## Hyperparameters optimization with hyperopt

In [None]:
%reload_kedro

In [None]:
from sklearn.linear_model import LogisticRegression
from time import time
from sklearn.metrics import accuracy_score
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe, space_eval
from pprint import pprint
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

In [None]:
import warnings
warnings.filterwarnings("ignore")

## Base

In [None]:
TARGET_COL = ["label"]
INDEX_COL = "window_nbr"

time_counter_secs = []
scores = []

N_SPLITS = 2

In [None]:
mt_train_multic = catalog.load("master_table_train_multic")
mt_test_multic = catalog.load("master_table_test_multic")

mt_train_multic = mt_train_multic.set_index(INDEX_COL).sort_index().reset_index(drop=True)
X_train, y_train = mt_train_multic.drop(columns=TARGET_COL), mt_train_multic[TARGET_COL]

X_test_oos = mt_test_multic.set_index(INDEX_COL).drop(columns=TARGET_COL)

In [None]:
tss = TimeSeriesSplit(n_splits=N_SPLITS)

for i, (train_idx, test_idx) in enumerate(tss.split(X_train), 1):
    if i == N_SPLITS:
        X_train, X_test = X_train.iloc[train_idx], X_train.iloc[test_idx]
        y_train, y_test = y_train.iloc[train_idx], y_train.iloc[test_idx]

In [None]:
# elastic net
space = {
        "solver": hp.choice("solver", ["saga"]),
        "penalty": hp.choice("penalty", ["elasticnet"]),
        "tol": hp.choice("tol", np.logspace(-4, 1, 20)),
        "C": hp.choice("C", np.logspace(-2, 1, 20)),
        "max_iter": hp.choice("max_iter", list(range(100, 1100, 100))),
        "fit_intercept": hp.choice("fit_intercept", [True, False]),
        "class_weight": hp.choice("class_weight", ["balanced", None]),
        "l1_ratio": hp.choice("l1_ratio", np.logspace(-2, 0, 20)),
        "random_state": 0
    }

# l1
space2 = {
        "solver": hp.choice("solver", ["saga", "liblinear"]),
        "penalty": hp.choice("penalty", ["l1"]),
        "tol": hp.choice("tol", np.logspace(-4, 1, 20)),
        "C": hp.choice("C", np.logspace(-2, 1, 20)),
        "max_iter": hp.choice("max_iter", list(range(100, 1100, 100))),
        "fit_intercept": hp.choice("fit_intercept", [True, False]),
        "class_weight": hp.choice("class_weight", ["balanced", None]),
        "random_state": 0
    }

# l2
space3 = {
        "solver": hp.choice("solver", ["saga", "lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag"]),
        "penalty": hp.choice("penalty", ["l2"]),
        "tol": hp.choice("tol", np.logspace(-4, 1, 20)),
        "C": hp.choice("C", np.logspace(-2, 1, 20)),
        "max_iter": hp.choice("max_iter", list(range(100, 1100, 100))),
        "fit_intercept": hp.choice("fit_intercept", [True, False]),
        "class_weight": hp.choice("class_weight", ["balanced", None]),
        "random_state": 0
    }

In [None]:
def objective(space):

	clf = LogisticRegression(**space)

	clf.fit(X_train, y_train)

	pred = clf.predict(X_test)
	accuracy = accuracy_score(y_test, pred>0.5)

	scores.append(accuracy)
		
	return {'loss': -1 * accuracy,
			'status': STATUS_OK}

## elastic net

In [None]:
start = time()

trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 150,
                        trials = trials)

end = time()

In [None]:
print(f"Optimization time in seconds: {round(end-start, 2)}")
print()

print(f"Best score: {round(max(scores), 2)}")
print()

print("The best hyperparameters are")
best_params = space_eval(space=space, hp_assignment=best_hyperparams)
pprint(best_params)

print()
clf = LogisticRegression(**best_params)
clf.fit(X_train, y_train)
pred = clf.predict(X_test_oos)
print(f"Predicted labels:\n{np.unique(pred, return_counts=True)}")

## l1

In [None]:
start = time()

trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space2,
                        algo = tpe.suggest,
                        max_evals = 150,
                        trials = trials)

end = time()

In [None]:
print(f"Optimization time in seconds: {round(end-start, 2)}")
print()

print(f"Best score: {round(max(scores), 2)}")
print()

print("The best hyperparameters are")
best_params = space_eval(space=space2, hp_assignment=best_hyperparams)
pprint(best_params)

print()
clf = LogisticRegression(**best_params)
clf.fit(X_train, y_train)
pred = clf.predict(X_test_oos)
print(f"Predicted labels:\n{np.unique(pred, return_counts=True)}")

## l2

In [None]:
start = time()

trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space3,
                        algo = tpe.suggest,
                        max_evals = 150,
                        trials = trials)

end = time()

In [None]:
print(f"Optimization time in seconds: {round(end-start, 2)}")
print()

print(f"Best score: {round(max(scores), 2)}")
print()

print("The best hyperparameters are")
best_params = space_eval(space=space3, hp_assignment=best_hyperparams)
pprint(best_params)

print()
clf = LogisticRegression(**best_params)
clf.fit(X_train, y_train)
pred = clf.predict(X_test_oos)
print(f"Predicted labels:\n{np.unique(pred, return_counts=True)}")