In [1]:
import os
from typing import Final

from bayes_opt import BayesianOptimization
from bayes_opt.event import Events
from bayes_opt.logger import JSONLogger
from bayes_opt.util import load_logs
from numpy import ndarray, where
from pandas import DataFrame, Series, concat, read_csv, set_option
from sklearn.metrics import f1_score
from sklearn.svm import OneClassSVM

set_option("display.max_columns", None)
NUM_TRIALS: Final[int] = 20
LOGS_PATH: Final[str] = "../reports/logs_bayesian.log"
logger = JSONLogger(path=LOGS_PATH, reset=False)

In [2]:
X_train = read_csv("../data/PAMAP2/x_train_data.csv")
X_test = read_csv("../data/PAMAP2/x_test_data.csv")
y_train = read_csv("../data/PAMAP2/y_train_data.csv")
y_test = read_csv("../data/PAMAP2/y_test_data.csv")

X_train["activity"] = y_train  # First 80% of the data
X_test["activity"] = y_test  # Last 20% of the data

# MIN_SAMPLES = X_train["activity"].value_counts().min()
MIN_SAMPLES = X_train["activity"].value_counts().sort_values().iloc[0]
MAXIMAZED = False

models: dict[int, dict] = {}
training_data: DataFrame
testing_data: DataFrame
train_targets: Series
test_targets: Series

In [3]:
def objective_function(nu: float, gamma: float, tol: float) -> float:
    """
    Objective function to optimize F1-Score using Bayesian Optimization.

    Args:
        nu (float): nu parameter for the OneClassSVM.
        gamma (float): gamma parameter for the OneClassSVM.
        tol (float): tol parameter for the OneClassSVM.

    Returns:
        float: Macro Average F1-Score.
    """
    oc_svm = OneClassSVM(kernel="rbf", nu=nu, gamma=gamma, tol=tol).fit(training_data)
    return float(
        f1_score(
            test_targets,
            where(oc_svm.predict(testing_data) == 1, False, True),
            average="macro",
        )
    )

In [4]:
optimizer = BayesianOptimization(
    objective_function,
    {"nu": (0.01, 0.5), "gamma": (1e-4, 1), "tol": (1e-5, 1e-1)},
    random_state=42,
)
if not os.path.exists(LOGS_PATH):
    with open(LOGS_PATH, "w") as fp:
        pass
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)


def update_train_vars(
    i: int, activities: ndarray
) -> tuple[DataFrame, Series, DataFrame, Series]:
    training = (  # picks the first n samples of each class
        X_train[X_train["activity"].isin(activities[:i])]
        .groupby("activity")
        .head(MIN_SAMPLES)
    )
    testing = X_test[X_test["activity"] == activities[i]].head(MIN_SAMPLES)
    training.loc[:, "isNovelty"], testing.loc[:, "isNovelty"] = False, True
    novelty = concat(
        [testing, training.sample(n=int(0.15 * len(training)), random_state=42)]
    )
    return (
        training.drop(columns=["isNovelty"]),
        training["isNovelty"],
        # only current activity (as novelty)
        novelty.drop(columns=["isNovelty"]),
        novelty["isNovelty"],
    )

In [5]:
for i in range(1, len((activities := X_train["activity"].unique()))):
    training_data, train_targets, testing_data, test_targets = update_train_vars(
        i, activities
    )
    print(f"Activity: {activities[:i]}, with {training_data.shape[0]} samples")
    load_logs(optimizer, logs=[LOGS_PATH])
    print("New optimizer is now aware of {} points.".format(len(optimizer.space)))

    if not MAXIMAZED:
        print("Maximizing for the first time (100 iterations)...")
        optimizer.maximize(init_points=25, n_iter=75)
        MAXIMAZED = True
    else:
        load_logs(optimizer, logs=[LOGS_PATH])
        print(f"Already maximized, sugesting new {NUM_TRIALS} points")

        iters = 0
        for j in range(NUM_TRIALS):
            MAX = optimizer.max["target"]  # type: ignore
            next_point_to_probe = optimizer.suggest()
            target = objective_function(**next_point_to_probe)
            optimizer.register(params=next_point_to_probe, target=target)
            if target > MAX:
                MAX = target
                print(
                    f"New best points found: {next_point_to_probe}. Max: {MAX}, continuing optimization"
                )
                iters += j
                j = 0
        print(f"Maximized for {iters} iterations")

    models[i] = optimizer.max  # type: ignore

Activity: [1], with 3483 samples
New optimizer is now aware of 0 points.
Maximizing for the first time (100 iterations)...
Activity: [1 2], with 6966 samples
New optimizer is now aware of 100 points.
Already maximized, sugesting new 20 points
Maximized for 0 iterations
Activity: [1 2 3], with 10449 samples
New optimizer is now aware of 120 points.
Already maximized, sugesting new 20 points
Maximized for 0 iterations
Activity: [1 2 3 4], with 13932 samples
New optimizer is now aware of 140 points.
Already maximized, sugesting new 20 points
Maximized for 0 iterations
Activity: [1 2 3 4 5], with 17415 samples
New optimizer is now aware of 160 points.
Already maximized, sugesting new 20 points
Maximized for 0 iterations
Activity: [1 2 3 4 5 6], with 20898 samples
New optimizer is now aware of 180 points.
Already maximized, sugesting new 20 points
Maximized for 0 iterations
Activity: [1 2 3 4 5 6 7], with 24381 samples
New optimizer is now aware of 200 points.
Already maximized, sugesting n