In [21]:
import hyperopt 
print(hyperopt.__version__)

0.2.7


In [22]:
from hyperopt import hp 

search_space = {
    'x': hp.quniform('x', -10, 10, 1),
    'y': hp.quniform('y', -15, 15, 1),
}

In [23]:
from hyperopt import STATUS_OK

def objective_func(search_space):
    x = search_space['x']
    y = search_space['y']
    retval = x**2 - 20*y 

    return retval

In [24]:
from hyperopt import fmin, tpe, Trials
import numpy as np 

trial_val = Trials() 

best_01 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=5, trials=trial_val, rstate=np.random.default_rng(seed=0))
print('best:', best_01)

100%|██████████| 5/5 [00:00<00:00, 1666.13trial/s, best loss: -224.0]
best: {'x': np.float64(-4.0), 'y': np.float64(12.0)}


In [25]:
trial_val = Trials()

best_02 = fmin(fn=objective_func, space=search_space, algo=tpe.suggest, max_evals=20, trials=trial_val, rstate=np.random.default_rng(seed=0))
print(trial_val.results)

100%|██████████| 20/20 [00:00<00:00, 1538.60trial/s, best loss: -296.0]
[{'loss': -64.0, 'status': 'ok'}, {'loss': -184.0, 'status': 'ok'}, {'loss': 56.0, 'status': 'ok'}, {'loss': -224.0, 'status': 'ok'}, {'loss': 61.0, 'status': 'ok'}, {'loss': -296.0, 'status': 'ok'}, {'loss': -40.0, 'status': 'ok'}, {'loss': 281.0, 'status': 'ok'}, {'loss': 64.0, 'status': 'ok'}, {'loss': 100.0, 'status': 'ok'}, {'loss': 60.0, 'status': 'ok'}, {'loss': -39.0, 'status': 'ok'}, {'loss': 1.0, 'status': 'ok'}, {'loss': -164.0, 'status': 'ok'}, {'loss': 21.0, 'status': 'ok'}, {'loss': -56.0, 'status': 'ok'}, {'loss': 284.0, 'status': 'ok'}, {'loss': 176.0, 'status': 'ok'}, {'loss': -171.0, 'status': 'ok'}, {'loss': 0.0, 'status': 'ok'}]


In [26]:
print(trial_val.vals)

{'x': [np.float64(-6.0), np.float64(-4.0), np.float64(4.0), np.float64(-4.0), np.float64(9.0), np.float64(2.0), np.float64(10.0), np.float64(-9.0), np.float64(-8.0), np.float64(-0.0), np.float64(-0.0), np.float64(1.0), np.float64(9.0), np.float64(6.0), np.float64(9.0), np.float64(2.0), np.float64(-2.0), np.float64(-4.0), np.float64(7.0), np.float64(-0.0)], 'y': [np.float64(5.0), np.float64(10.0), np.float64(-2.0), np.float64(12.0), np.float64(1.0), np.float64(15.0), np.float64(7.0), np.float64(-10.0), np.float64(0.0), np.float64(-5.0), np.float64(-3.0), np.float64(2.0), np.float64(4.0), np.float64(10.0), np.float64(3.0), np.float64(3.0), np.float64(-14.0), np.float64(-8.0), np.float64(11.0), np.float64(-0.0)]}


In [27]:
import pandas as pd 

losses = [loss_dict['loss'] for loss_dict in trial_val.results]

result_df = pd.DataFrame({'x': trial_val.vals['x'], 'y': trial_val.vals['y'], 'losses': losses})
result_df 

Unnamed: 0,x,y,losses
0,-6.0,5.0,-64.0
1,-4.0,10.0,-184.0
2,4.0,-2.0,56.0
3,-4.0,12.0,-224.0
4,9.0,1.0,61.0
5,2.0,15.0,-296.0
6,10.0,7.0,-40.0
7,-9.0,-10.0,281.0
8,-8.0,0.0,64.0
9,-0.0,-5.0,100.0


In [28]:
from sklearn.datasets import load_breast_cancer 
from sklearn.model_selection import train_test_split 

dataset = load_breast_cancer() 

cancer_df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
cancer_df['target'] = dataset.target 
X_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=156)

In [30]:
from hyperopt import hp 

xgb_search_space = {
    'max_depth': hp.quniform('max_depth', 5, 20, 1),
    'min_child_weight': hp.quniform('min_child_weight', 1, 2, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1)
}

In [31]:
from sklearn.model_selection import cross_val_score 
from xgboost import XGBClassifier 

def objective_func(search_space): 
    xgb_clf = XGBClassifier(n_estimators=100, max_depth=int(search_space['max_depth']), min_child_weight=int(search_space['min_child_weight']), learning_rate=search_space['learning_rate'], colsample_bytree=search_space['colsample_bytree'], eval_metric='logloss')
    accuracy = cross_val_score(xgb_clf, X_tr, y_tr, scoring='accuracy', cv=3)

    return {'loss': -1 * np.mean(accuracy), 'status': STATUS_OK}


In [32]:
trial_val = Trials() 
best = fmin(fn=objective_func, space=xgb_search_space, algo=tpe.suggest, max_evals=50, trials=trial_val, rstate=np.random.default_rng(seed=9))
print('best:', best)

100%|██████████| 50/50 [00:06<00:00,  8.13trial/s, best loss: -0.9780127379418921]
best: {'colsample_bytree': np.float64(0.8637740285716389), 'learning_rate': np.float64(0.10657919742273766), 'max_depth': np.float64(8.0), 'min_child_weight': np.float64(2.0)}


In [35]:
print(f'colsample_bytree: {best["colsample_bytree"]}')
print(f'learning_rate: {best["learning_rate"]}')
print(f'max_depth: {best["max_depth"]}')
print(f'min_child_weight: {best["min_child_weight"]}')


colsample_bytree: 0.8637740285716389
learning_rate: 0.10657919742273766
max_depth: 8.0
min_child_weight: 2.0


In [33]:
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score

def get_clf_eval(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix( y_test, pred)
    accuracy = accuracy_score(y_test , pred)
    precision = precision_score(y_test , pred)
    recall = recall_score(y_test , pred)
    f1 = f1_score(y_test,pred)
    # ROC-AUC 추가 
    roc_auc = roc_auc_score(y_test, pred_proba)
    print('오차 행렬')
    print(confusion)
    # ROC-AUC print 추가
    print('정확도: {0:.4f}, 정밀도: {1:.4f}, 재현율: {2:.4f},\
    F1: {3:.4f}, AUC:{4:.4f}'.format(accuracy, precision, recall, f1, roc_auc))

In [None]:
from lightgbm import early_stopping


xgb_wrapper = XGBClassifier(n_estimators=400, learning_rate=round(best['learning_rate'], 5), eval_metric='logloss', max_depth=int(best['max_depth']), min_child_weight=int(best['min_child_weight']), colsample_bytree=round(best['colsample_bytree'], 5), early_stopping_rounds=50)

evals = [(X_tr, y_tr), (X_val, y_val)]
xgb_wrapper.fit(X_tr, y_tr, eval_set=evals, verbose=True)

preds = xgb_wrapper.predict(X_test)
pred_proba = xgb_wrapper.predict_proba(X_test)[:, 1]

get_clf_eval(y_test, preds, pred_proba)

[0]	validation_0-logloss:0.58148	validation_1-logloss:0.60735
[1]	validation_0-logloss:0.51293	validation_1-logloss:0.56948
[2]	validation_0-logloss:0.45447	validation_1-logloss:0.52227
[3]	validation_0-logloss:0.40544	validation_1-logloss:0.48525
[4]	validation_0-logloss:0.36422	validation_1-logloss:0.45206
[5]	validation_0-logloss:0.32900	validation_1-logloss:0.42505
[6]	validation_0-logloss:0.29748	validation_1-logloss:0.40527
[7]	validation_0-logloss:0.27132	validation_1-logloss:0.38587
[8]	validation_0-logloss:0.24638	validation_1-logloss:0.36906
[9]	validation_0-logloss:0.22635	validation_1-logloss:0.35576
[10]	validation_0-logloss:0.20691	validation_1-logloss:0.34055
[11]	validation_0-logloss:0.19028	validation_1-logloss:0.33020
[12]	validation_0-logloss:0.17568	validation_1-logloss:0.32350
[13]	validation_0-logloss:0.16282	validation_1-logloss:0.31550
[14]	validation_0-logloss:0.15096	validation_1-logloss:0.30866
[15]	validation_0-logloss:0.14009	validation_1-logloss:0.30236
[1