In [35]:
import optuna
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn import ensemble
from sklearn import datasets
from sklearn import model_selection

from xgboost import XGBClassifier

In [3]:
X, y = datasets.load_breast_cancer(return_X_y=True, as_frame=True)

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: target, Length: 569, dtype: int64

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [37]:
params = {"n_estimators": 100, "max_depth": 3, "learning_rate": 0.0001}
xgb_class = XGBClassifier(**params)

xgb_class.fit(X_train, y_train)

fitted = xgb_class.predict(X_test)

print(pd.crosstab(fitted, y_test))
print("Accuracy: %.3f" % np.mean(fitted == y_test))

target   0   1
row_0         
0       38   1
1        3  72
Accuracy: 0.965


In [47]:
def _xgb_params(trial):
    params = {"n_estimators": trial.suggest_int('n_estimators', 20, 100),
              "max_depth": trial.suggest_int('max_depth', 2, 5),
              "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.2, 1),
              "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1)
             }
    return params

def objective(trial):
    params = _xgb_params(trial)
    classifier = XGBClassifier(**params)
    score = model_selection.cross_val_score(classifier, X, y, n_jobs=-1, cv=5)
    return score.mean()

In [48]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, )

[I 2020-09-26 10:12:47,862] Finished trial#0 with value: 0.9490607048594939 with parameters: {'n_estimators': 33, 'max_depth': 2, 'colsample_bylevel': 0.8649097091371967, 'learning_rate': 0.05070932125430598}. Best is trial#0 with value: 0.9490607048594939.
[I 2020-09-26 10:12:48,043] Finished trial#1 with value: 0.9648812296227295 with parameters: {'n_estimators': 45, 'max_depth': 5, 'colsample_bylevel': 0.2545295585110771, 'learning_rate': 0.015641426023596507}. Best is trial#1 with value: 0.9648812296227295.
[I 2020-09-26 10:12:48,335] Finished trial#2 with value: 0.9631113181183046 with parameters: {'n_estimators': 91, 'max_depth': 3, 'colsample_bylevel': 0.892098353946494, 'learning_rate': 0.07590543544866524}. Best is trial#1 with value: 0.9648812296227295.
[I 2020-09-26 10:12:48,500] Finished trial#3 with value: 0.956078248719143 with parameters: {'n_estimators': 40, 'max_depth': 3, 'colsample_bylevel': 0.25554365173799987, 'learning_rate': 0.0702568807505721}. Best is trial#1 w

[I 2020-09-26 10:12:56,250] Finished trial#31 with value: 0.9666200900481291 with parameters: {'n_estimators': 74, 'max_depth': 4, 'colsample_bylevel': 0.438472730226608, 'learning_rate': 0.06611739296855408}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:12:56,528] Finished trial#32 with value: 0.9666200900481291 with parameters: {'n_estimators': 84, 'max_depth': 4, 'colsample_bylevel': 0.5820627180075566, 'learning_rate': 0.08862235828270829}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:12:56,777] Finished trial#33 with value: 0.9666200900481291 with parameters: {'n_estimators': 80, 'max_depth': 5, 'colsample_bylevel': 0.35982550189060597, 'learning_rate': 0.08448874357761149}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:12:57,058] Finished trial#34 with value: 0.9648657040832169 with parameters: {'n_estimators': 93, 'max_depth': 4, 'colsample_bylevel': 0.29761402451760044, 'learning_rate': 0.05643261999124647}. Best is 

[I 2020-09-26 10:13:04,323] Finished trial#62 with value: 0.9666200900481291 with parameters: {'n_estimators': 87, 'max_depth': 4, 'colsample_bylevel': 0.4181311674850686, 'learning_rate': 0.0644680251498802}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:13:04,562] Finished trial#63 with value: 0.9631113181183046 with parameters: {'n_estimators': 97, 'max_depth': 4, 'colsample_bylevel': 0.2836244136375592, 'learning_rate': 0.04613399566260796}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:13:04,807] Finished trial#64 with value: 0.9648657040832169 with parameters: {'n_estimators': 83, 'max_depth': 5, 'colsample_bylevel': 0.37168263173943145, 'learning_rate': 0.08310712516819627}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:13:05,084] Finished trial#65 with value: 0.9683744760130415 with parameters: {'n_estimators': 88, 'max_depth': 4, 'colsample_bylevel': 0.46976029255112983, 'learning_rate': 0.06291391544943739}. Best is 

[I 2020-09-26 10:13:11,774] Finished trial#93 with value: 0.9613569321533924 with parameters: {'n_estimators': 53, 'max_depth': 4, 'colsample_bylevel': 0.9115228556410152, 'learning_rate': 0.09446112915001516}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:13:12,008] Finished trial#94 with value: 0.9613414066138798 with parameters: {'n_estimators': 75, 'max_depth': 4, 'colsample_bylevel': 0.3538594248598275, 'learning_rate': 0.09125430972253816}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:13:12,252] Finished trial#95 with value: 0.9701133364384411 with parameters: {'n_estimators': 81, 'max_depth': 4, 'colsample_bylevel': 0.40605149470680957, 'learning_rate': 0.0845869613323636}. Best is trial#27 with value: 0.9718987734823784.
[I 2020-09-26 10:13:12,471] Finished trial#96 with value: 0.9666045645086166 with parameters: {'n_estimators': 61, 'max_depth': 4, 'colsample_bylevel': 0.4259326427316849, 'learning_rate': 0.09003389439673593}. Best is t

In [49]:
best_params = study.best_params
print(best_params)

xgb_class = XGBClassifier(**best_params)

xgb_class.fit(X_train, y_train)

fitted = xgb_class.predict(X_test)

print(pd.crosstab(fitted, y_test))
print("Accuracy: %.3f" % np.mean(fitted == y_test))

{'n_estimators': 80, 'max_depth': 5, 'colsample_bylevel': 0.20950277453382715, 'learning_rate': 0.08864525921048523}
target   0   1
row_0         
0       39   1
1        2  72
Accuracy: 0.974


In [51]:
study.best_value

0.9718987734823784

In [25]:
pd.crosstab(fitted, y_test)

target,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,38,1
1,3,72
