# Import

## Modules

In [56]:
import numpy as np
import xgboost as xgb
from hyperopt import fmin, tpe, STATUS_OK, STATUS_FAIL, Trials
from hyperopt import hp
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import roc_auc_score

## Data

In [57]:
data = load_breast_cancer()
X = data['data']
y = data['target']

# Setup parameters

In [74]:
xgb_cls_params = {
    'learning_rate':    hp.choice('learning_rate',    np.arange(0.05, 0.31, 0.05)),
    'max_depth':        hp.choice('max_depth',        np.arange(5, 16, 1, dtype=int)),
    'min_child_weight': hp.choice('min_child_weight', np.arange(1, 8, 1, dtype=int)),
    'colsample_bytree': hp.choice('colsample_bytree', np.arange(0.3, 0.8, 0.1)),
    'gamma':            hp.choice('gamma', np.arange(0.3, 0.8, 0.1)),
    'subsample':        hp.uniform('subsample', 0.8, 1),
    'n_estimators':     hp.choice('n_estimators', np.arange(100, 600, 100)),
}

xgb_fit_params = {
    'eval_metric': 'auc',
    'early_stopping_rounds': 10,
    'verbose': False
}
xgb_para = dict()
xgb_para['cls_params'] = xgb_cls_params
xgb_para['fit_params'] = xgb_fit_params
xgb_para['loss_func'] = lambda y, pred: abs(1.0 - roc_auc_score(y, pred))


In [75]:
class HPOpt(object):

    def __init__(self, x_train, x_test, y_train, y_test):
        self.x_train = x_train
        self.x_test  = x_test
        self.y_train = y_train
        self.y_test  = y_test

    def process(self, fn_name, space, trials, algo, max_evals):
        fn = getattr(self, fn_name)
        try:
            result = fmin(fn=fn, space=space, algo=algo, max_evals=max_evals, trials=trials)
        except Exception as e:
            return {'status': STATUS_FAIL,
                    'exception': str(e)}
        return result, trials

    def xgb_cls(self, para):
        estimator = xgb.XGBClassifier(**para['cls_params'], use_label_encoder=False)
        return self.train_estimator(estimator, para)

    def train_estimator(self, estimator, para):
        estimator.fit(self.x_train, self.y_train,
                eval_set=[(self.x_train, self.y_train), (self.x_test, self.y_test)],
                **para['fit_params'])
        pred = estimator.predict(self.x_test)
        loss = para['loss_func'](self.y_test, pred)
        return {'loss': loss, 'status': STATUS_OK}

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

obj = HPOpt(X_train, X_test, y_train, y_test)

xgb_opt = obj.process(fn_name='xgb_cls', space=xgb_para, trials=Trials(), algo=tpe.suggest, max_evals=1000)

100%|██████████| 1000/1000 [00:42<00:00, 23.46trial/s, best loss: 0.00925925925925919]


In [77]:
xgb_opt

({'colsample_bytree': 4,
  'learning_rate': 4,
  'max_depth': 5,
  'min_child_weight': 6,
  'n_estimators': 4,
  'subsample': 0.8140332408402652},
 <hyperopt.base.Trials at 0x7ff714d499a0>)