In [1]:
from xgboost import XGBClassifier
from sklearn.datasets import fetch_mldata
from sklearn.metrics import confusion_matrix, accuracy_score
from skopt import BayesSearchCV
from skopt.space import Integer, Real

In [2]:
mnist = fetch_mldata('MNIST original', data_home='~/pd/mnist_data')
X_train, y_train = mnist.data[:60000], mnist.target[:60000]

X_test, y_test = mnist.data[60000:], mnist.target[60000:]

In [3]:
xgbc = XGBClassifier(n_jobs=-1)

In [5]:
search_spaces = {
    'learning_rate': Real(0.01, 0.2),
    'n_estimators': Integer(50, 500),
    'subsample': Real(0.5, 1.0),
    'min_child_weight': Integer(1, 10)
}
n_iter = 5

search = BayesSearchCV(estimator=xgbc, search_spaces=search_spaces, n_iter=n_iter, n_jobs=-1, verbose=3)

In [None]:
%time search.fit(X_train, y_train)



Fitting 3 folds for each of 64 candidates, totalling 192 fits
[CV] learning_rate=0.141425493041, min_child_weight=3, n_estimators=92, subsample=0.734394629061 
[CV] learning_rate=0.141425493041, min_child_weight=3, n_estimators=92, subsample=0.734394629061 
[CV] learning_rate=0.141425493041, min_child_weight=3, n_estimators=92, subsample=0.734394629061 
[CV] learning_rate=0.0737123720929, min_child_weight=4, n_estimators=322, subsample=0.731544025582 
[CV] learning_rate=0.0737123720929, min_child_weight=4, n_estimators=322, subsample=0.731544025582 
[CV] learning_rate=0.0737123720929, min_child_weight=4, n_estimators=322, subsample=0.731544025582 
[CV] learning_rate=0.0154082330195, min_child_weight=5, n_estimators=462, subsample=0.581441554617 
[CV] learning_rate=0.0154082330195, min_child_weight=5, n_estimators=462, subsample=0.581441554617 
[CV] learning_rate=0.0154082330195, min_child_weight=5, n_estimators=462, subsample=0.581441554617 
[CV] learning_rate=0.0115187197784, min_chil

In [8]:
search.best_estimator_, search.best_params_

(XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
        colsample_bytree=1, gamma=0, learning_rate=0.2026360833258897,
        max_delta_step=0, max_depth=3, min_child_weight=6, missing=None,
        n_estimators=437, n_jobs=-1, nthread=None,
        objective='multi:softprob', random_state=0, reg_alpha=0,
        reg_lambda=1, scale_pos_weight=1, seed=None, silent=True,
        subsample=0.52847818048127726),
 {'learning_rate': 0.2026360833258897,
  'min_child_weight': 6,
  'n_estimators': 437,
  'subsample': 0.52847818048127726})

In [11]:
confusion_matrix(y_test, search.predict(X_test))

array([[ 968,    0,    1,    0,    0,    2,    5,    1,    3,    0],
       [   0, 1126,    2,    1,    0,    1,    3,    0,    2,    0],
       [   3,    0, 1006,    6,    4,    0,    1,    6,    6,    0],
       [   1,    0,    5,  988,    0,    3,    0,    5,    4,    4],
       [   2,    0,    5,    1,  959,    0,    2,    0,    2,   11],
       [   2,    0,    1,    8,    0,  869,    7,    1,    3,    1],
       [   7,    2,    0,    0,    1,    3,  939,    1,    5,    0],
       [   1,    1,   11,    5,    1,    0,    0,  996,    2,   11],
       [   5,    1,    4,    3,    3,    1,    1,    3,  949,    4],
       [   2,    6,    1,    5,   11,    1,    0,    3,    3,  977]])

In [12]:
accuracy_score(y_test, search.predict(X_test))

0.97770000000000001