In [1]:
import numpy as np
from pandas import DataFrame, Series, read_csv

from sklearn.svm import OneClassSVM
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

In [2]:
X_train = read_csv('../data/PAMAP2/x_train_data.csv')
X_valid = read_csv('../data/PAMAP2/x_val_data.csv')
y_train = read_csv('../data/PAMAP2/y_train_data.csv')
y_valid = read_csv('../data/PAMAP2/y_val_data.csv')

In [3]:
def filter_major_classes(y_classes:list, data: DataFrame, classes: list[int]):
    data['target'] = classes
    return (
        data[
            data['target'].isin(y_classes)
        ]
        .groupby('target')
        .apply(lambda x: x.sample(n=10000, random_state=42))
        .reset_index(drop=True)
    )

y_classes: list[int] = [1, 2, 3, 4] #, 16, 17]

data = filter_major_classes(y_classes, X_train, y_train)
X_train_balanced, y_train_balanced = data.drop(columns=['target']), data['target']

  .apply(lambda x: x.sample(n=10000, random_state=42))


In [4]:
def score_function(model, X_train, y_true) -> float:
    return float(
        f1_score(
            np.ones(len(y_true)),
            model.predict(X_train),
            pos_label=1
        )
    )

def svm_target_function(nu:float, gamma:float) -> float:
    model = OneClassSVM(nu=nu, kernel='rbf', gamma=gamma).fit(X=X_train_balanced)
    # Calculate performance on the validation set and Convert the predictions:
    # 1 for normal and -1 for novelty. Returns the f1 score
    return score_function(model, X_train_balanced, y_train_balanced)

In [5]:
# Define bounds for nu and gamma
bounds = {
    'nu': (0.01, 0.5),
    'gamma': (0.001, 1)
}

# Run Bayesian Optimization
optimizer = BayesianOptimization(
    f=svm_target_function,
    pbounds=bounds,
    random_state=42
)
# optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
optimizer.maximize(init_points=5, n_iter=25)

print(f"Best result: {optimizer.max['params']}; f(x) = {optimizer.max['target']}.")

|   iter    |  target   |   gamma   |    nu     |
-------------------------------------------------
| [39m1        [39m | [39m0.7236   [39m | [39m0.3752   [39m | [39m0.4759   [39m |
| [39m2        [39m | [39m0.6604   [39m | [39m0.7323   [39m | [39m0.3033   [39m |
| [35m3        [39m | [35m0.8324   [39m | [35m0.1569   [39m | [35m0.08644  [39m |
| [39m4        [39m | [39m0.7138   [39m | [39m0.05903  [39m | [39m0.4344   [39m |
| [39m5        [39m | [39m0.7672   [39m | [39m0.6015   [39m | [39m0.357    [39m |
| [35m6        [39m | [35m0.8644   [39m | [35m0.02312  [39m | [35m0.1172   [39m |
| [39m7        [39m | [39m0.8472   [39m | [39m0.03517  [39m | [39m0.01     [39m |
| [39m8        [39m | [39m0.8547   [39m | [39m0.03606  [39m | [39m0.125    [39m |
| [39m9        [39m | [39m0.739    [39m | [39m0.3811   [39m | [39m0.01     [39m |
| [39m10       [39m | [39m0.7069   [39m | [39m1.0      [39m | [39m0.01     [39m |


In [5]:
# Define the parameter grids
param_grid: dict[str, list[float]] = {
    'nu': [0.01, 0.05, 0.1, 0.2, 0.3, 0.5],
    'gamma': [0.001, 0.01, 0.1, 0.5, 1]
}

# Grid Search
grid_search: GridSearchCV[OneClassSVM] = GridSearchCV(
    estimator=OneClassSVM(kernel='rbf'),
    param_grid=param_grid,
    scoring=make_scorer(score_func=score_function),
    n_jobs=-1,
    verbose=3,
    # cv=5,
).fit(X_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


KeyboardInterrupt: 

In [None]:
# Random Search
random_search: RandomizedSearchCV = RandomizedSearchCV(
    estimator=OneClassSVM(kernel='rbf'),
    param_distributions=param_grid,
    n_iter=30,
    scoring=make_scorer(score_func=score_function),
    n_jobs=-1,
    # cv=5,
    verbose=3,
    random_state=42,
).fit(X_train)

In [None]:
# Extract the best results
bayesian_best_params: dict[str, float] = optimizer.max['params']
grid_best_params: dict[str, float] = grid_search.best_params_
random_best_params: dict[str, float] = random_search.best_params_

print("Bayesian Optimization Best Params:", bayesian_best_params)
print("Grid Search Best Params:", grid_best_params)
print("Random Search Best Params:", random_best_params)

# You can further test these models on your test set and compare their performance