# Hyper Parameter Tuning using Bayesian Optimization based HyperOpt
Why? Grid Search takes toooooo much time!

In [1]:
# What if params be like...

params = {
    "max_depth": [10, 20, 30, 40, 50],
    "num_leaves": [35, 45, 55, 65],
    "colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9],
    "subsample": [0.5, 0.6, 0.7, 0.8, 0.9],
    "min_child_weight": [10, 20, 30, 40],
    "reg_alpha": [0.01, 0.05, 0.1],
}

## Overview of Bayesian Optimization
afterwards model creates based on new inputs

- Surrogate Model: recommends inputs
- Acquisition Function: calculates optimized inputs

Bayesian Optimization Steps
1. 파라미터 샘플링 및 성능 관측
2. 대체 모델이 최적 함수를 추정 (신뢰 구간 제공)
3. 획득 함수가 다음 관측할 하이퍼 파라미터 값 계산 -> 대체 모델에 전달
4. 대체 모델은 다시 갱신되어 최적 함수 예측 추정

optimized function => generally Gaussian Process, but HyperOpt uses TPE (Tree-structure Parzen Estimator)

In [2]:
!pip install hyperopt

Collecting hyperopt
  Downloading hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting py4j
  Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.5/200.5 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: py4j, hyperopt
Successfully installed hyperopt-0.2.7 py4j-0.10.9.7


In [3]:
from hyperopt import hp

search_space = {'x': hp.quniform('x', -10, 10, 1), 'y': hp.quniform('y', -15, 15, 1)}

hp.quniform(label, low, high, q): label space will be from low to high (q steps)
hp.uniform(label, low, high): label space will be from low to high (normal distribution)
hp.randint(label, upper): label space will be from 0 to maximum upper (integer)
hp.loguniform(label, low, high): uniform(low, high), but return's log value will be normal distribution
hp.choice(label, options): use if search value contains string or character

In [4]:
from hyperopt import STATUS_OK

def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x ** 2 - 20 * y

    return retval

fmin(objective, space, algo, max_evals, trials)
- fn: objective function
- space: search space
- algo \[default="tpe.suggest"]: Bayesian optimization algorithm
- max_evals: input trials
- trials: saves inputs and return values
- rstate: fmin() random seed value

In [19]:
import numpy as np
from hyperopt import fmin, tpe, Trials

trial_val = Trials()

best_01 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=5,
               trials=trial_val, rstate=np.random.default_rng(seed=0))

print("best: ", best_01)

100%|██████████| 5/5 [00:00<00:00, 1155.77trial/s, best loss: -224.0]
best:  {'x': -4.0, 'y': 12.0}


In [20]:
# 20 times evaluations
best_02 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=20,
               trials=trial_val, rstate=np.random.default_rng(seed=0))

print("best: ", best_02)

100%|██████████| 20/20 [00:00<00:00, 1323.43trial/s, best loss: -296.0]
best:  {'x': 2.0, 'y': 15.0}


In [13]:
best_03 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=1000,
               trials=trial_val, rstate=np.random.default_rng(seed=0))

print("best: ", best_03)

100%|██████████| 1000/1000 [00:15<00:00, 58.34trial/s, best loss: -300.0]
best:  {'x': -0.0, 'y': 15.0}


In [21]:
print(trial_val.results)

[{'loss': -64.0, 'status': 'ok'}, {'loss': -184.0, 'status': 'ok'}, {'loss': 56.0, 'status': 'ok'}, {'loss': -224.0, 'status': 'ok'}, {'loss': 61.0, 'status': 'ok'}, {'loss': -64.0, 'status': 'ok'}, {'loss': -184.0, 'status': 'ok'}, {'loss': 56.0, 'status': 'ok'}, {'loss': -224.0, 'status': 'ok'}, {'loss': 61.0, 'status': 'ok'}, {'loss': -296.0, 'status': 'ok'}, {'loss': -40.0, 'status': 'ok'}, {'loss': 281.0, 'status': 'ok'}, {'loss': 64.0, 'status': 'ok'}, {'loss': 100.0, 'status': 'ok'}, {'loss': 60.0, 'status': 'ok'}, {'loss': -39.0, 'status': 'ok'}, {'loss': 1.0, 'status': 'ok'}, {'loss': -164.0, 'status': 'ok'}, {'loss': 21.0, 'status': 'ok'}]


In [22]:
print(trial_val.vals)

{'x': [-6.0, -4.0, 4.0, -4.0, 9.0, -6.0, -4.0, 4.0, -4.0, 9.0, 2.0, 10.0, -9.0, -8.0, -0.0, -0.0, 1.0, 9.0, 6.0, 9.0], 'y': [5.0, 10.0, -2.0, 12.0, 1.0, 5.0, 10.0, -2.0, 12.0, 1.0, 15.0, 7.0, -10.0, 0.0, -5.0, -3.0, 2.0, 4.0, 10.0, 3.0]}


In [23]:
import pandas as pd

losses = [loss_dict["loss"] for loss_dict in trial_val.results]

result_df = pd.DataFrame({'x': trial_val.vals['x'], 'y': trial_val.vals['y'], 'losses': losses})
result_df

Unnamed: 0,x,y,losses
0,-6.0,5.0,-64.0
1,-4.0,10.0,-184.0
2,4.0,-2.0,56.0
3,-4.0,12.0,-224.0
4,9.0,1.0,61.0
5,-6.0,5.0,-64.0
6,-4.0,10.0,-184.0
7,4.0,-2.0,56.0
8,-4.0,12.0,-224.0
9,9.0,1.0,61.0


## Hyper parameter Optimization using HyperOpt

In [2]:
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

dataset = load_breast_cancer()

cancer_df = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
cancer_df["target"] = dataset.target
X_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

# 80% train set, 20% test set
X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=156)

In [3]:
from hyperopt import hp

# max_depth: 5~20, 1 step
# min_child_weight: 1~2, 1 step
# colsample_bytree: 0.5~1
#learning_rate: 0.01~0.3, normal-distributed
xgb_search_space = {
    "max_depth": hp.quniform("max_depth", 5, 20, 1),
    "min_child_weight": hp.quniform("min_child_weight", 1, 2, 1),
    "learning_rate": hp.uniform("learning_rate", 0.01, 0.2),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
}

caution
- quniform searches number with real number; it should be like integer
- Hyperopt's objective function should return minimum value -> if bigger value is better, it should be returned after multiply -1

In [4]:
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from hyperopt import STATUS_OK

def objective_func(search_space):
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space["max_depth"]),
                            min_child_weight=int(search_space["min_child_weight"]),
                            learning_rate=search_space["learning_rate"],
                            colsample_bytree=search_space["colsample_bytree"],
                            eval_metric="logloss")
    accuracy = cross_val_score(xgb_clf, X_train, y_train, scoring="accuracy", cv=3)

    # accuracy will be an array (that has roc-auc)
    return {"loss":-1 * np.mean(accuracy), "status": STATUS_OK}

In [6]:
# search hyper parameter
import numpy as np
from hyperopt import fmin, tpe, Trials

trial_val = Trials()

best = fmin(fn=objective_func,
            space=xgb_search_space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trial_val,
            rstate=np.random.default_rng(seed=9))

print("best:", best)

100%|██████████| 50/50 [00:22<00:00,  2.22trial/s, best loss: -0.9692401533635412]
best: {'colsample_bytree': 0.548301545497125, 'learning_rate': 0.1840281762576621, 'max_depth': 18.0, 'min_child_weight': 2.0}


In [7]:
print("colsample_bytree:{0}, learning_rate:{1}, max_depth:{2}, min_child_weight:{3}".format(round(best["colsample_bytree"], 5), round(best["learning_rate"], 5),
                                                                                            int(best["max_depth"]), int(best["min_child_weight"])))

colsample_bytree:0.5483, learning_rate:0.18403, max_depth:18, min_child_weight:2


In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


def get_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    roc_auc = roc_auc_score(y_test, pred_proba)
    print("confusion matrix: \n", confusion)
    print("accuracy: {0:.4f}, precision: {1:.4f}, recall: {2:.4f}, f1: {3:.4f}, ROC-AUC: {4:.4f}"
          .format(accuracy, precision, recall, f1, roc_auc))

In [10]:
# XGBClassifier re-learning
xgb_wrapper = XGBClassifier(n_estimators=400,
                            learning_rate=round(best["learning_rate"], 5),
                            max_depth=int(best["max_depth"]),
                            min_child_weight=int(best["min_child_weight"]),
                            colsample_bytree=round(best["colsample_bytree"], 5))

evals = [(X_tr, y_tr), (X_val, y_val)]
xgb_wrapper.fit(X_tr, y_tr, early_stopping_rounds=50, eval_metric="logloss",
                eval_set=evals, verbose=True)

prds = xgb_wrapper.predict(X_test)
prd_proba = xgb_wrapper.predict_proba(X_test)[:, 1]

get_clf_eval(y_test, prds, prd_proba)

[0]	validation_0-logloss:0.54472	validation_1-logloss:0.58675
[1]	validation_0-logloss:0.44155	validation_1-logloss:0.52577
[2]	validation_0-logloss:0.36542	validation_1-logloss:0.48906
[3]	validation_0-logloss:0.30756	validation_1-logloss:0.45704
[4]	validation_0-logloss:0.26142	validation_1-logloss:0.41671
[5]	validation_0-logloss:0.22616	validation_1-logloss:0.39605
[6]	validation_0-logloss:0.19465	validation_1-logloss:0.37095
[7]	validation_0-logloss:0.16951	validation_1-logloss:0.36066
[8]	validation_0-logloss:0.14718	validation_1-logloss:0.34686
[9]	validation_0-logloss:0.13006	validation_1-logloss:0.33716
[10]	validation_0-logloss:0.11635	validation_1-logloss:0.32332
[11]	validation_0-logloss:0.10455	validation_1-logloss:0.32074
[12]	validation_0-logloss:0.09388	validation_1-logloss:0.31916
[13]	validation_0-logloss:0.08434	validation_1-logloss:0.30987
[14]	validation_0-logloss:0.07702	validation_1-logloss:0.30469
[15]	validation_0-logloss:0.07144	validation_1-logloss:0.30293
[1



[76]	validation_0-logloss:0.01782	validation_1-logloss:0.26490
[77]	validation_0-logloss:0.01773	validation_1-logloss:0.26706
[78]	validation_0-logloss:0.01764	validation_1-logloss:0.26707
[79]	validation_0-logloss:0.01755	validation_1-logloss:0.26798
[80]	validation_0-logloss:0.01745	validation_1-logloss:0.26496
[81]	validation_0-logloss:0.01736	validation_1-logloss:0.26459
[82]	validation_0-logloss:0.01728	validation_1-logloss:0.26434
[83]	validation_0-logloss:0.01719	validation_1-logloss:0.26527
[84]	validation_0-logloss:0.01710	validation_1-logloss:0.26244
[85]	validation_0-logloss:0.01702	validation_1-logloss:0.26336
[86]	validation_0-logloss:0.01694	validation_1-logloss:0.26229
[87]	validation_0-logloss:0.01686	validation_1-logloss:0.26193
[88]	validation_0-logloss:0.01678	validation_1-logloss:0.26243
[89]	validation_0-logloss:0.01671	validation_1-logloss:0.26481
[90]	validation_0-logloss:0.01663	validation_1-logloss:0.26362
[91]	validation_0-logloss:0.01656	validation_1-logloss: