In [None]:
from bayes_opt import BayesianOptimization
from bayes_opt.event import Events
from bayes_opt.logger import JSONLogger
from bayes_opt.util import load_logs
from numpy import where
from pandas import DataFrame, concat
from sklearn.metrics import f1_score
from sklearn.svm import OneClassSVM

In [None]:
def evaluate_hyperparams(nu, gamma):
    # Train the model using the given hyperparameters
    X_train = current_data.drop(columns=["isNovelty"])
    y_train = current_data["isNovelty"]

    oc_svm = OneClassSVM(nu=nu, kernel="rbf", gamma=gamma).fit(X_train)

    # Sample and add to novelty data
    sampled_data = current_data.sample(n=int(0.2 * len(current_data)), random_state=42)
    sampled_data["isNovelty"] = 0
    combined_novelty = concat([novelty, sampled_data])

    X_test = combined_novelty.drop(columns=["isNovelty"])
    y_test = combined_novelty["isNovelty"]

    # Predict and calculate F1 scores
    y_pred_train = where(oc_svm.predict(X_train) == 1, False, True)
    y_pred_test = where(oc_svm.predict(X_test) == 1, False, True)

    f1_train = f1_score(y_train, y_pred_train, pos_label=True)
    f1_test = f1_score(y_test, y_pred_test, pos_label=True)

    # Return the F1 score for test as the objective to maximize
    return f1_test

In [None]:
def train_svm_for_activity(
    current_data: DataFrame, novelty: DataFrame, activity: int, log_file: str
) -> OneClassSVM:
    # Load previous logs if available
    optimizer = BayesianOptimization(
        f=evaluate_hyperparams,
        pbounds={"nu": (0.01, 0.5), "gamma": (0.001, 1)},
        random_state=42,
        verbose=2,
    )

    if log_file:
        try:
            load_logs(optimizer, logs=[log_file])
            print(f"Loaded logs from {log_file}. Continuing optimization...")
        except FileNotFoundError:
            print(
                f"No previous log found. Starting fresh optimization for activity {activity}."
            )

    # Add a logger to save results
    logger = JSONLogger(path=log_file)
    optimizer.subscribe(Events.OPTMIZATION_STEP, logger)  # type: ignore

    # Run optimization
    optimizer.maximize(init_points=5, n_iter=10)

    # Check if we found new best hyperparameters
    best_hyperparams = optimizer.max["params"]  # type: ignore
    print(f"Best hyperparameters for Activity {activity}: {best_hyperparams}")

    # If no improvement after iterations, fall back to previous best
    if not optimizer.res or optimizer.res[-1]["target"] < optimizer.max["target"]:  # type: ignore
        print("No improvement found. Using the best hyperparameters so far.")

    # Train final model using the best hyperparameters
    best_nu = best_hyperparams["nu"]
    best_gamma = best_hyperparams["gamma"]
    final_model = OneClassSVM(nu=best_nu, kernel="rbf", gamma=best_gamma).fit(
        current_data.drop(columns=["isNovelty"])
    )

    # Evaluate and print final results
    sampled_data = current_data.sample(n=int(0.2 * len(current_data)), random_state=42)
    sampled_data["isNovelty"] = 0
    novelty = concat([novelty, sampled_data])

    X_test = novelty.drop(columns=["isNovelty"])
    y_test = novelty["isNovelty"]

    y_pred_train = where(final_model.predict(X_train) == 1, False, True)
    y_pred_test = where(final_model.predict(X_test) == 1, False, True)

    print(
        f"Final Model for Activity: {activity}"
        f"\nF1 Score (Train): {f1_score(y_train, y_pred_train, pos_label=True)}"
        f"\nF1 Score (Test): {f1_score(y_test, y_pred_test, pos_label=True)}"
    )

    return final_model

In [None]:
train_svm_for_activity(
    current_data, novelty, activity=1, log_file="activity_1_logs.json"
)