In [1]:
# Example from https://hyperopt.github.io/hyperopt/
# define an objective function
def objective(args):
    case, val = args
    if case == "case 1":
        return val
    else:
        return val**2


# define a search space
from hyperopt import hp

space = hp.choice(
    "a",
    [("case 1", 1 + hp.lognormal("c1", 0, 1)), ("case 2", hp.uniform("c2", -10, 10))],
)

# minimize the objective over the space
from hyperopt import fmin, tpe, space_eval

best = fmin(objective, space, algo=tpe.suggest, max_evals=100)

print(best)
# -> {'a': 1, 'c2': 0.01420615366247227}
print(space_eval(space, best))
# -> ('case 2', 0.01420615366247227}

100%|██████████| 100/100 [00:00<00:00, 369.46trial/s, best loss: 0.0007949766691081164]
{'a': 1, 'c2': 0.02819533062597629}
('case 2', 0.02819533062597629)


In [2]:
# Example from
# https://medium.com/@attud_bidirt/automatic-tuning-of-hyper-parameters-of-a-xgboost-classifier-c5588bceda4
from sklearn import datasets

data = datasets.load_breast_cancer()

from pandas.core.common import random_state
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd

X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

ss = StratifiedShuffleSplit(2, test_size=0.2, random_state=44)
for tr_idx, ts_idx in ss.split(X, y):
    X_train, y_train = X.loc[tr_idx], y.loc[tr_idx]
    X_test, y_test = X.loc[ts_idx], y.loc[ts_idx]

print(f"\nShape of X_train is {X_train.shape}")
print(f"\nShape of X_test is {X_test.shape}")
print(f"\nLength of y_train is {y_train.shape}")
print(f"\nLength of y_test is {y_test.shape}")

from sklearn.metrics import f1_score, recall_score, confusion_matrix, roc_auc_score

params_1 = {"objective": "binary:logistic"}

n = 1000

import xgboost as xgb

dtrain_clf = xgb.DMatrix(X_train, y_train, enable_categorical=True)

results = xgb.cv(
    params_1,
    dtrain_clf,
    num_boost_round=n,
    nfold=5,
    metrics=["logloss", "auc", "error"],
    early_stopping_rounds=20,
)


clf_1 = xgb.XGBClassifier(**params_1)

clf_1.fit(X_train, y_train)

pred_1 = clf_1.predict(X_test)

print(f"f1 score : {f1_score(y_test, pred_1)}\n")
print(f"confusion Matrix:\n{confusion_matrix(y_test, pred_1)}\n")


# Now for the hyperparameter tuning
import xgboost as xgb
from sklearn.model_selection import cross_val_score
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe, space_eval
from hyperopt.early_stop import no_progress_loss
import numpy as np

search_space = {
    "max_depth": hp.choice("max_depth", np.arange(1, 20, 1, dtype=int)),
    "eta": hp.uniform("eta", 0, 1),
    "gamma": hp.uniform("gamma", 0, 10e1),
    "reg_alpha": hp.uniform("reg_alpha", 10e-7, 10),
    "reg_lambda": hp.uniform("reg_lambda", 0, 1),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
    "colsample_bynode": hp.uniform("colsample_bynode", 0.5, 1),
    "colsample_bylevel": hp.uniform("colsample_bylevel", 0.5, 1),
    # "n_estimators": hp.choice("n_estimators", np.arange(100, 1000, 10, dtype="int")),
    "min_child_weight": hp.choice("min_child_weight", np.arange(1, 10, 1, dtype="int")),
    "max_delta_step": hp.choice("max_delta_step", np.arange(1, 10, 1, dtype="int")),
    "subsample": hp.uniform("subsample", 0.5, 1),
    "objective": "binary:logistic",
    "eval_metric": "aucpr",
    "seed": 44,
}

search_space_3 = {
    "eta": hp.uniform("eta", 0, 1),
    "gamma": 0,  # hp.uniform("gamma", 0, 1),
    "reg_lambda": hp.uniform("reg_lambda", 0, 1),
    # "n_estimators": hp.choice("n_estimators", np.arange(100, 1000, 10, dtype="int")),
    "objective": "binary:logistic",
    "eval_metric": "auc",
    "seed": 44,
}


def xgb_objective(space):
    results = xgb.cv(
        space,
        dtrain=dtrain_clf,  # DMatrix (xgboost specific)
        num_boost_round=500,
        nfold=5,
        stratified=True,
        early_stopping_rounds=20,
        metrics=["logloss", "auc", "aucpr", "error"],
    )

    best_score = results["test-auc-mean"].max()
    return {"loss": -best_score, "status": STATUS_OK}


trials = Trials()

best_hyperparams = fmin(
    fn=xgb_objective,
    space=search_space_3,
    algo=tpe.suggest,
    max_evals=500,
    trials=trials,
    return_argmin=False,
    early_stop_fn=no_progress_loss(10),
)

best_params = best_hyperparams.copy()

# `eval_metric` is a key that is not a hyperparameter of the classifier
if "eval_metric" in best_params:
    best_params = {key: best_params[key] for key in best_params if key != "eval_metric"}

best_params

clf_2 = xgb.XGBClassifier(**best_params)
clf_2.fit(X_train, y_train)

pred_2 = clf_2.predict(X_test)

print(f"f1 score : {f1_score(y_test, pred_2)}\n")


Shape of X_train is (455, 30)

Shape of X_test is (114, 30)

Length of y_train is (455,)

Length of y_test is (114,)
f1 score : 0.9403973509933775

confusion Matrix:
[[34  8]
 [ 1 71]]

  3%|▎         | 16/500 [00:03<01:40,  4.83trial/s, best loss: -0.9902992776057792]
f1 score : 0.9530201342281879

