In [None]:
import numpy as np
from bayes_opt import BayesianOptimization
from pandas import DataFrame, read_csv
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import OneClassSVM

In [None]:
X_train = read_csv("../data/PAMAP2/x_train_data.csv")
X_valid = read_csv("../data/PAMAP2/x_val_data.csv")
y_train = read_csv("../data/PAMAP2/y_train_data.csv")
y_valid = read_csv("../data/PAMAP2/y_val_data.csv")

In [None]:
def filter_major_classes(
    y_classes: list, data: DataFrame, classes: DataFrame
) -> DataFrame:
    data["target"] = classes
    return (
        data[data["target"].isin(values=y_classes)]
        .groupby(by="target")
        .apply(func=lambda x: x.sample(n=10000, random_state=42))
        .reset_index(drop=True)
    )


y_classes: list[int] = [1, 2, 3, 4]  # , 16, 17] -> 0 "noNovelty"
X_train_balanced, y_train_balanced = (
    filter_major_classes(y_classes, X_train, y_train).drop(columns=["target"]),
    0,
)

In [None]:
def score_function(model, X_train, y_true) -> float:
    return float(f1_score(np.ones(len(y_true)), model.predict(X_train), pos_label=1))


def svm_target_function(nu: float, gamma: float, tol: float) -> float:
    model = OneClassSVM(kernel="rbf", gamma=gamma, tol=tol, nu=nu).fit(
        X=X_train_balanced
    )
    # Calculate performance on the validation set and Convert the predictions:
    # 1 for normal and -1 for novelty. Returns the f1 score
    return score_function(model, X_train_balanced, y_train_balanced)

In [None]:
optimizer = BayesianOptimization(
    f=svm_target_function,
    pbounds={"nu": (0.01, 0.5), "gamma": (0.001, 1), "tol": (1e-3, 1e-5)},
    random_state=42,
)
# optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
optimizer.maximize(init_points=5, n_iter=25)
print(f"Best result: {optimizer.max['params']}; f(x) = {optimizer.max['target']}.")

In [None]:
# Define the parameter grids
param_grid: dict[str, list[float]] = {
    "nu": [0.01, 0.05, 0.1, 0.2, 0.3, 0.5],
    "gamma": [0.001, 0.01, 0.1, 0.5, 1],
}

# Grid Search
grid_search: GridSearchCV[OneClassSVM] = GridSearchCV(
    estimator=OneClassSVM(kernel="rbf"),
    param_grid=param_grid,
    scoring=make_scorer(score_func=score_function),
    n_jobs=-1,
    verbose=3,
    cv=5,
).fit(X_train)

In [None]:
# Random Search
random_search: RandomizedSearchCV = RandomizedSearchCV(
    estimator=OneClassSVM(kernel="rbf"),
    param_distributions=param_grid,
    n_iter=30,
    scoring=make_scorer(score_func=score_function),
    n_jobs=-1,
    cv=5,
    verbose=3,
    random_state=42,
).fit(X_train)

In [None]:
# Extract the best results
bayesian_best_params: dict[str, float] = optimizer.max["params"]
grid_best_params: dict[str, float] = grid_search.best_params_
random_best_params: dict[str, float] = random_search.best_params_

print("Bayesian Optimization Best Params:", bayesian_best_params)
print("Grid Search Best Params:", grid_best_params)
print("Random Search Best Params:", random_best_params)

# You can further test these models on your test set and compare their performance