In [1]:
%matplotlib inline

import numpy as np
import xgboost as xgb
from sklearn import datasets
from sklearn.metrics import auc 
from matplotlib import pylab as plt 
from sklearn.model_selection import KFold 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import make_scorer, mean_squared_error
from seqmml import SeqUD, SeqRand, GPEIOPT, SMACOPT, TPEOPT,\
                    RandSearch, LHSSearch, SobolSearch, UDSearch

dt = datasets.load_wine()
sx = MinMaxScaler()
x = sx.fit_transform(dt.data)
y = dt.target.reshape([-1,1])

ParaSpace = {'booster':          {'Type': 'categorical', 'Mapping': ['gbtree', 'gblinear']},
             'max_depth':        {'Type': 'integer',     'Mapping': np.linspace(2,10,9)}, 
             'n_estimators':     {'Type': 'integer',     'Mapping': np.linspace(100,300,201)},
             'min_child_weight': {'Type': 'integer',     'Mapping': np.linspace(1,10,10)},
             'subsample':        {'Type': 'continuous',  'Range': [0, 1],  'Wrapper': lambda x:x},
             'colsample_bytree': {'Type': 'continuous',  'Range': [0, 1],  'Wrapper': lambda x:x},
             'learning_rate':    {'Type': 'continuous',  'Range': [-5, 0], 'Wrapper': lambda x: 10**x},
             'gamma':            {'Type': 'continuous',  'Range': [-5, 0], 'Wrapper': lambda x: 10**x},
             'reg_lambda':       {'Type': 'continuous',  'Range': [-5, 0], 'Wrapper': lambda x: 10**x},
             'reg_alpha':         {'Type': 'continuous',  'Range': [-5, 0], 'Wrapper': lambda x: 10**x}}

estimator = xgb.XGBClassifier()
cv = KFold(n_splits=3, random_state=0, shuffle=True)

model_zoo = { "Rand": RandSearch, 
              "LHS": LHSSearch, 
              "Sobol": SobolSearch, 
              "UD": UDSearch, 
              "SeqUD": SeqUD, 
              "SeqRand": SeqRand,
              "SMAC": SMACOPT, 
              "TPE": TPEOPT, 
              "GPEI": GPEIOPT}

In [2]:
rand_clf = RandSearch(ParaSpace, max_runs = 100, n_jobs = 10, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
rand_clf.fit(x, y)

HBox(children=(IntProgress(value=0), HTML(value='')))


Search completed (100/100) with best score: 0.98324.
Search completed in 12.83 seconds.
The best score is: 0.98324.
The best configurations are:
booster             : gbtree
max_depth           : 2
n_estimators        : 231
min_child_weight    : 1
subsample           : 0.77058
colsample_bytree    : 0.01561
learning_rate       : 3e-05
gamma               : 0.00192
reg_lambda          : 6e-05
reg_alpha           : 0.00061


In [3]:
lhs_clf = LHSSearch(ParaSpace, max_runs = 100, n_jobs = 10, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
lhs_clf.fit(x, y)

HBox(children=(IntProgress(value=0), HTML(value='')))


Search completed (100/100) with best score: 0.98315.
Search completed in 9.67 seconds.
The best score is: 0.98315.
The best configurations are:
booster             : gbtree
max_depth           : 2
n_estimators        : 165
min_child_weight    : 1
subsample           : 0.315
colsample_bytree    : 0.095
learning_rate       : 0.18836
gamma               : 0.00944
reg_lambda          : 0.00033
reg_alpha           : 1e-05


In [4]:
sobol_clf = SobolSearch(ParaSpace, max_runs = 100, n_jobs = 10, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
sobol_clf.fit(x, y)

HBox(children=(IntProgress(value=0), HTML(value='')))


Search completed (100/100) with best score: 0.98305.
Search completed in 9.62 seconds.
The best score is: 0.98305.
The best configurations are:
booster             : gbtree
max_depth           : 6
n_estimators        : 104
min_child_weight    : 2
subsample           : 0.89844
colsample_bytree    : 0.30469
learning_rate       : 0.00028
gamma               : 0.00048
reg_lambda          : 0.00048
reg_alpha           : 6e-05


In [5]:
ud_clf = UDSearch(ParaSpace, max_runs = 100, level_number = 20, n_jobs = 10, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
ud_clf.fit(x, y)

TypeError: design_query() got an unexpected keyword argument 'ShowCrit'

In [None]:
sequd_clf = SeqUD(ParaSpace, max_runs = 100, level_number = 20, n_jobs = 10, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
sequd_clf.fit(x, y)

In [None]:
seqrand_clf = SeqRand(ParaSpace, max_runs = 100, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
seqrand_clf.fit(x, y)

In [None]:
gpei_clf = GPEIOPT(ParaSpace, max_runs = 100, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
gpei_clf.fit(x, y)

In [None]:
smac_clf = SMACOPT(ParaSpace, max_runs = 100, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
smac_clf.fit(x, y)

In [None]:
tpe_clf = TPEOPT(ParaSpace, max_runs = 100, estimator = estimator, cv = cv, 
            scoring = "accuracy", refit = True, rand_seed = 0, verbose = True)
tpe_clf.fit(x, y)

In [None]:
plt.plot(rand_clf.logs["score"].cummax()) 
plt.plot(lhs_clf.logs["score"].cummax()) 
plt.plot(sobol_clf.logs["score"].cummax()) 
plt.plot(ud_clf.logs["score"].cummax()) 
plt.plot(sequd_clf.logs["score"].cummax()) 
plt.plot(seqrand_clf.logs["score"].cummax()) 
plt.plot(gpei_clf.logs["score"].cummax()) 
plt.plot(smac_clf.logs["score"].cummax()) 
plt.plot(tpe_clf.logs["score"].cummax()) 
plt.legend(model_zoo.keys())