In [5]:
from pickle import dump

from bayes_opt import BayesianOptimization
from bayes_opt.event import Events
from bayes_opt.logger import JSONLogger
from numpy import where
from pandas import DataFrame, Series, concat, read_csv
from sklearn.metrics import f1_score
from sklearn.svm import OneClassSVM

logger = JSONLogger(path="./logs.log")

In [6]:
X_train = read_csv("../data/PAMAP2/x_train_data.csv")
X_test = read_csv("../data/PAMAP2/x_test_data.csv")
y_train = read_csv("../data/PAMAP2/y_train_data.csv")
y_test = read_csv("../data/PAMAP2/y_test_data.csv")

X_train["activity"] = y_train  # First 80% of the data
X_test["activity"] = y_test  # Last 20% of the data

In [4]:
def objective_function(nu: float, gamma: float) -> float:
    """
    Objective function to optimize F1-Score on the test set.

    Args:
        nu (float): nu param to evaluate.
        gamma (float): gamma param to evaluate.

    Returns:
        float: F1-Score on the test set of this iteration.
    """
    X_train = current_data.drop(columns=["isNovelty"])
    y_train = current_data["isNovelty"]

    sampled_data = current_data.sample(n=int(0.2 * len(current_data)), random_state=42)
    sampled_data["isNovelty"] = 0
    novelty = concat([novelty, sampled_data])

    X_test = novelty.drop(columns=["isNovelty"])  # only current activity (as novelty)
    y_test = novelty["isNovelty"]

    oc_svm = OneClassSVM(kernel="rbf", nu=nu, gamma=gamma).fit(X_train)
    y_pred_test = where(oc_svm.predict(X_test) == 1, False, True)

    return float(f1_score(y_test, y_pred_test, pos_label=True))


def eval_current_model(
    model: OneClassSVM,
    X_train: DataFrame,
    y_train: Series,
    X_test: DataFrame,
    y_test: Series,
    activity: int,
) -> None:
    """
    Evaluate the current OneClassSVM model for a specific activity.

    Args:
        model (OneClassSVM): model to evaluate
        X_train (DataFrame): training data
        y_train (Series): _description_
        X_test (DataFrame): _description_
        y_test (Series): _description_
        activity (int): _description_
    """
    y_pred_train = where(model.predict(X_train) == 1, False, True)
    y_pred_test = where(model.predict(X_test) == 1, False, True)

    print(
        f"Activity: {activity}"
        f"\nF1 Score (Train): {f1_score(y_train, y_pred_train, pos_label=True)}"
        f"\nF1 Score (Test): {f1_score(y_test, y_pred_test, pos_label=True)}"
        f"\nErrors in Train {y_pred_train[y_pred_train is False].size}"
        f"\nErrors in Test {y_pred_test[y_pred_test is False].size}"
    )

In [3]:
def train_svm_for_activity(
    current_data: DataFrame, novelty: DataFrame, activity: int
) -> OneClassSVM:
    X_train = current_data.drop(columns=["isNovelty"])
    y_train = current_data["isNovelty"]

    oc_svm = OneClassSVM(kernel="rbf").fit(X_train)

    sampled_data = current_data.sample(n=int(0.2 * len(current_data)), random_state=42)
    sampled_data["isNovelty"] = 0
    novelty = concat([novelty, sampled_data])

    X_test = novelty.drop(columns=["isNovelty"])  # only current activity (as novelty)
    y_test = novelty["isNovelty"]

    pbounds = {"nu": (0.01, 0.5), "gamma": (1e-4, 1e-1)}
    optimizer = BayesianOptimization(
        f=objective_function, pbounds=pbounds, random_state=42
    )
    optimizer.subscribe(Events.OPTMIZATION_STEP, logger)  # type: ignore
    optimizer.maximize(init_points=5, n_iter=25)

    return oc_svm

In [4]:
models: dict[int, OneClassSVM] = {}

for activity in X_train["activity"].unique():
    novelty = X_train[X_train["activity"] == activity].copy()
    current_data = X_train[~X_train.index.isin(novelty.index)].copy()

    current_data.loc[:, "isNovelty"], novelty.loc[:, "isNovelty"] = False, True

    X_train = current_data.drop(columns=["isNovelty"])
    y_train = current_data["isNovelty"]

    sampled_data = current_data.sample(n=int(0.2 * len(current_data)), random_state=42)
    sampled_data["isNovelty"] = 0
    novelty = concat([novelty, sampled_data])

    X_test = novelty.drop(columns=["isNovelty"])  # only current activity (as novelty)
    y_test = novelty["isNovelty"]

    oc_svm = train_svm_for_activity(current_data, novelty, activity)

    models[activity] = oc_svm

    with open(f"models/model_{activity}.pkl", "wb") as f:
        dump(oc_svm, f)

Activity: 1
F1 Score (Train): 0.0
F1 Score (Test): 0.6909126652577776
Errors in Train 0
Errors in Train 0
Activity: 2
F1 Score (Train): 0.0
F1 Score (Test): 0.6808703764403318
Errors in Train 0
Errors in Train 0
Activity: 3
F1 Score (Train): 0.0
F1 Score (Test): 0.665589416012321
Errors in Train 0
Errors in Train 0
Activity: 4
F1 Score (Train): 0.0
F1 Score (Test): 0.14748624670079816
Errors in Train 0
Errors in Train 0
Activity: 5
F1 Score (Train): 0.0
F1 Score (Test): 0.5105552076041705
Errors in Train 0
Errors in Train 0
Activity: 6
F1 Score (Train): 0.0
F1 Score (Test): 0.6507028440666884
Errors in Train 0
Errors in Train 0
Activity: 7
F1 Score (Train): 0.0
F1 Score (Test): 0.6801341706388238
Errors in Train 0
Errors in Train 0
Activity: 12
F1 Score (Train): 0.0
F1 Score (Test): 0.0
Errors in Train 0
Errors in Train 0
Activity: 13
F1 Score (Train): 0.0
F1 Score (Test): 0.0
Errors in Train 0
Errors in Train 0
Activity: 16
F1 Score (Train): 0.0
F1 Score (Test): 0.0
Errors in Train 0
