In [2]:
import os
from typing import Final

from bayes_opt import BayesianOptimization
from bayes_opt.event import Events
from bayes_opt.logger import JSONLogger
from bayes_opt.util import load_logs
from numpy import where
from pandas import DataFrame, Series, concat, read_csv, set_option
from sklearn.metrics import f1_score
from sklearn.svm import OneClassSVM

set_option("display.max_columns", None)
NUM_TRIALS: Final[int] = 5
LOGS_PATH: Final[str] = "../reports/logs_bayesian.log"
logger = JSONLogger(path=LOGS_PATH, reset=False)

In [3]:
X_train = read_csv("../data/PAMAP2/x_train_data.csv")
X_test = read_csv("../data/PAMAP2/x_test_data.csv")
y_train = read_csv("../data/PAMAP2/y_train_data.csv")
y_test = read_csv("../data/PAMAP2/y_test_data.csv")

X_train["activity"] = y_train  # First 80% of the data
X_test["activity"] = y_test  # Last 20% of the data

# MIN_SAMPLES = X_train["activity"].value_counts().min()
MIN_SAMPLES = X_train["activity"].value_counts().sort_values().iloc[0]
MAXIMAZED = False

models: dict[int, dict] = {}
training_data: DataFrame
testing_data: DataFrame
train_targets: Series
test_targets: Series

In [4]:
def objective_function(nu: float, gamma: float, tol: float) -> float:
    """
    Objective function to optimize F1-Score on the train and test set.
    Evaluates the OneClassSVM model using stratified k-fold cross-validation.

    Args:
        nu (float): nu param to evaluate.
        gamma (float): gamma param to evaluate.
        tol (float): tol param to evaluate.

    Returns:
        float: F1-Score on the test set of this iteration.
    """
    oc_svm = OneClassSVM(kernel="rbf", nu=nu, gamma=gamma).fit(training_data)

    f1_train = f1_score(
        train_targets,
        where(oc_svm.predict(training_data) == 1, False, True),
        average="macro",
    )
    f1_test = f1_score(
        test_targets,
        where(oc_svm.predict(testing_data) == 1, False, True),
        average="macro",
    )
    print(f"F1 Score | Train: {f1_train} | F1 Score (Test): {f1_test}")

    return float(f1_test)
    # train_f1_scores, test_f1_scores = [], []
    # for train_idx, test_idx in StratifiedKFold(n_splits=4).split(
    #     training_data, train_targets
    # ):
    #     X_train, X_test = training_data.iloc[train_idx], training_data.iloc[test_idx]
    #     y_train, y_test = train_targets.iloc[train_idx], train_targets.iloc[test_idx]
    #     oc_svm = OneClassSVM(kernel="rbf", nu=nu, gamma=gamma, tol=tol).fit(X_train)

    #     train_f1_scores.append(
    #         f1_score(y_train, where(oc_svm.predict(X_train) == 1, False, True))
    #     )
    #     test_f1_scores.append(
    #         f1_score(X_test, where(oc_svm.predict(y_test) == 1, False, True))
    #     )
    # mean_f1_train = mean(train_f1_scores)
    # mean_f1_test = mean(test_f1_scores)

    # print(f"Mean F1 Score | Train: {mean_f1_train} | Test: {mean_f1_test}")

    # return float(mean_f1_test)

In [5]:
optimizer = BayesianOptimization(
    objective_function,
    {"nu": (0.01, 0.5), "gamma": (1e-4, 1), "tol": (1e-5, 1e-1)},
    random_state=42,
)
if not os.path.exists(LOGS_PATH):
    with open(LOGS_PATH, "w") as fp:
        pass
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [None]:
for i in range(1, len((activities := X_train["activity"].unique()))):
    training = (  # picks the first n samples of each class
        X_train[X_train["activity"].isin(activities[:i])]
        .groupby("activity")
        .head(MIN_SAMPLES)
        .copy()
    )
    print(f"Activity: {activities[:i]}, with {training.shape[0]} samples")
    testing = X_test[X_test["activity"] == activities[i]].head(MIN_SAMPLES)
    training.loc[:, "isNovelty"], testing.loc[:, "isNovelty"] = False, True
    novelty = concat(
        [testing, training.sample(n=int(0.2 * len(training)), random_state=42)]
    )
    training_data = training.drop(columns=["isNovelty"])
    train_targets = training["isNovelty"]
    testing_data = novelty.drop(columns=["isNovelty"])
    test_targets = novelty["isNovelty"]  # only current activity (as novelty)

    load_logs(optimizer, logs=[LOGS_PATH])
    print("New optimizer is now aware of {} points.".format(len(optimizer.space)))

    if not MAXIMAZED:
        optimizer.maximize(init_points=25, n_iter=75)
        MAXIMAZED = True

    else:
        load_logs(optimizer, logs=[LOGS_PATH])
        print(f"Already maximized, sugesting new {NUM_TRIALS} points")
        for i in range(NUM_TRIALS):
            MAX = optimizer.max["target"]  # type: ignore
            print(
                "Next point:",
                next_point_to_probe := optimizer.suggest(),
                "Found the target:",
                target := objective_function(**next_point_to_probe),
            )
            optimizer.register(params=next_point_to_probe, target=target)
            if target > MAX:
                print("New best points found, continuing optimization")
                i = 0

    models[i] = optimizer.max  # type: ignore

Activity: [1], with 3483 samples
New optimizer is now aware of 0 points.
F1 Score | Train: 0.34505453177886425 | F1 Score (Test): 0.8186073501173345
F1 Score | Train: 0.3893758765778401 | F1 Score (Test): 0.8727061491895177
F1 Score | Train: 0.3611518708730741 | F1 Score (Test): 0.8433346601555294
F1 Score | Train: 0.3611518708730741 | F1 Score (Test): 0.8361390704198579
F1 Score | Train: 0.362321493958257 | F1 Score (Test): 0.8413848831814261
F1 Score | Train: 0.3459154929577465 | F1 Score (Test): 0.8314926668050301
F1 Score | Train: 0.43264375305424335 | F1 Score (Test): 0.9187089612606458
F1 Score | Train: 0.4105601624640379 | F1 Score (Test): 0.8955304542713595
F1 Score | Train: 0.43904010307617974 | F1 Score (Test): 0.9313577851220737
F1 Score | Train: 0.4927177395863676 | F1 Score (Test): 0.9921215644220804
F1 Score | Train: 0.48544836755798493 | F1 Score (Test): 0.9854290332972188
F1 Score | Train: 0.4863589441085386 | F1 Score (Test): 0.9836242733288261
F1 Score | Train: 0.4867

In [7]:
for activity in X_train["activity"].unique():
    print(models[activity])

{'target': np.float64(0.9921215644220804), 'params': {'gamma': np.float64(0.0031400102729492382), 'nu': np.float64(0.023952365950400806), 'tol': np.float64(6.833590324718843e-05)}}


KeyError: np.int64(2)