In [1]:
import random

import numpy as np
from pandas import DataFrame, Series, read_csv, concat

from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_recall_curve, auc, make_scorer

In [2]:
X_train = read_csv('../data/PAMAP2/x_train_data.csv')
X_valid = read_csv('../data/PAMAP2/x_val_data.csv')
X_test = read_csv('../data/PAMAP2/x_test_data.csv')
y_train = read_csv('../data/PAMAP2/y_train_data.csv')
y_valid = read_csv('../data/PAMAP2/y_val_data.csv')
y_test = read_csv('../data/PAMAP2/y_test_data.csv')

In [None]:
def filter_major_classes(y_classes:list, data: DataFrame, classes: list[int], sample_zize: int=10000):
    data['target'] = classes
    print('filtering major classes: ', y_classes)
    return (
        data[
            data['target'].isin(y_classes)
        ]
        .groupby('target')
        .apply(lambda x: x.sample(n=sample_zize, random_state=42))
        .reset_index(drop=True)
    )

y_classes: list[int] = [1, 2, 3, 4, 16, 17]

data = filter_major_classes(y_classes, X_train, y_train)
X_train_balanced, y_train_balanced = data.drop(columns=['target']), data['target']

filtering major classes:  [1, 2, 3, 4, 16, 17]


  .apply(lambda x: x.sample(n=sample_zize, random_state=42))


In [None]:
model = OneClassSVM(nu=0.01, kernel='rbf', gamma=0.01).fit(X=X_train_balanced)

In [None]:
def pick_random_classes() -> list[int]:
    """
    Choose between picking 2 or 3 random numbers and Randomly
    pick the numbers from the range 5 to 24

    Returns:
        list[int]: random classes as list
    """
    classes = (5, 6, 7, 17, 12, 16)
    return random.sample(population=classes, k=random.choice(seq=[2, 3]))

In [None]:
def gen_test_samples(X_test, y_test, new_class_percent) -> tuple[DataFrame, Series]:
    main_classes = filter_major_classes([1, 2, 3, 4], X_test, y_test, 2000)
    new_classes = filter_major_classes(pick_random_classes(), X_test, y_test, 2000)

    main_sample = main_classes.sample(n=int(main_classes.shape[0] * (1 - .25)), random_state=42)
    new_sample = new_classes.sample(n=int(main_classes.shape[0] * .25), random_state=42)

    data_balanced = concat([main_sample, new_sample])

    return data_balanced.drop(columns=['target']), data_balanced['target'].apply(lambda x: 0 if x in [1, 2, 3, 4] else 1)

In [None]:
def evaluate_ocsvm_model(model, X_test, y_test) -> tuple[float, float, float, float]:
    """
    Evaluate a One-Class SVM model using accuracy, PR-AUC, ROC-AUC, and F1 score.
    
    Parameters:
    - model: The trained One-Class SVM model.
    - X_test: The test DataFrame.
    - y_test: The corresponding labels.
    - new_class_percent: Percentage (0 to 1) of 80,000 instances that should come from the new classes.
    
    Returns:
    - Tuple of (accuracy, pr_auc, roc_auc, f1).
    """
    # Convert predictions from {1, -1} to {1 (normal), 0 (novelty)}
    predictions = np.where(model.predict(X_test) == 1, 1, 0)
    y_true = np.where(y_test, 1, 0)
    precision, recall, _ = precision_recall_curve(y_true, predictions)

    return (
        float(accuracy_score(y_true, predictions)),
        float(auc(recall, precision)),
        float(roc_auc_score(y_true, predictions)),
        float(f1_score(y_true, predictions))
    )

In [None]:
x, y = gen_test_samples(X_test, y_test, 0)
values_0 = evaluate_ocsvm_model(model, x, y)

filtering major classes:  [1, 2, 3, 4]
filtering major classes:  [5, 7]


  .apply(lambda x: x.sample(n=sample_zize, random_state=42))
  .apply(lambda x: x.sample(n=sample_zize, random_state=42))


In [None]:
x, y = gen_test_samples(X_test, y_test, 0.25)
values_25 = evaluate_ocsvm_model(model, x, y)

filtering major classes:  [1, 2, 3, 4]
filtering major classes:  [7, 12]


  .apply(lambda x: x.sample(n=sample_zize, random_state=42))
  .apply(lambda x: x.sample(n=sample_zize, random_state=42))


In [None]:
x, y = gen_test_samples(X_test, y_test, 0.70)
values_70 = evaluate_ocsvm_model(model, x, y)

filtering major classes:  [1, 2, 3, 4]
filtering major classes:  [6, 12, 17]


  .apply(lambda x: x.sample(n=sample_zize, random_state=42))
  .apply(lambda x: x.sample(n=sample_zize, random_state=42))


In [None]:
x, y = gen_test_samples(X_test, y_test, 0.90)
values_90 = evaluate_ocsvm_model(model, x, y)

filtering major classes:  [1, 2, 3, 4]
filtering major classes:  [17, 6]


  .apply(lambda x: x.sample(n=sample_zize, random_state=42))
  .apply(lambda x: x.sample(n=sample_zize, random_state=42))


In [None]:
print(values_0)
print(values_25)
print(values_70)
print(values_90)

(0.567875, 0.125, 0.3785833333333333, 0.0)
(0.567875, 0.125, 0.3785833333333333, 0.0)
(0.59875, 0.24378902582159626, 0.4403333333333333, 0.13336933045356372)
(0.613875, 0.2948219178082192, 0.4705833333333333, 0.19241830065359478)
