In [6]:
from numpy import where
from pandas import DataFrame, read_csv
from sklearn.metrics import f1_score
from sklearn.svm import OneClassSVM

X_train = read_csv("../data/PAMAP2/x_train_data.csv")
X_test = read_csv("../data/PAMAP2/x_test_data.csv")
y_train = read_csv("../data/PAMAP2/y_train_data.csv")
y_test = read_csv("../data/PAMAP2/y_test_data.csv")

In [7]:
X_train["activity"] = y_train  # First 80% of the data
X_test["activity"] = y_test  # Last 20% of the data

In [8]:
def train_svm_for_activity(
    current_data: DataFrame, novelty: DataFrame, activity: int
) -> OneClassSVM:
    X_train = current_data.drop(columns=["isNovelty"])
    y_train = current_data["isNovelty"]

    # only current activity (as novelty)
    X_test = novelty.drop(columns=["isNovelty"])
    y_test = novelty["isNovelty"]

    oc_svm = OneClassSVM(kernel="rbf").fit(X_train)

    y_pred_train = where(oc_svm.predict(X_train) == 1, False, True)
    y_pred_test = where(oc_svm.predict(X_test) == 1, False, True)

    print(
        f"Activity: {activity}"
        f"\nF1 Score (Train): {f1_score(y_train, y_pred_train, pos_label=True)}"
        f"\nF1 Score (Test): {f1_score(y_test, y_pred_test, pos_label=True)}"
        f"Errors in Train {y_pred_train[y_pred_train == -1].size}"
        f"Errors in Train {y_pred_test[y_pred_test == -1].size}"
    )
    return oc_svm

In [10]:
models: dict[int, OneClassSVM] = {}

for activity in X_train["activity"].unique():
    novelty = X_train[X_train["activity"] == activity].copy()
    current_data = X_train[~X_train.index.isin(novelty.index)].copy()

    current_data.loc[:, "isNovelty"], novelty.loc[:, "isNovelty"] = False, True
    oc_svm = train_svm_for_activity(current_data, novelty, activity)

    models[activity] = oc_svm

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_data.loc[:, "isNovelty"], novelty.loc[:, "isNovelty"] = False, True
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_data.loc[:, "isNovelty"], novelty.loc[:, "isNovelty"] = False, True


Activity: 1, F1 Score: 1.0
