Source From [Here](https://www.kaggle.com/tanitter/grid-search-xgboost-with-scikit-learn)

### Import

In [1]:
# Check this gist for xgboost wrapper: https://gist.github.com/slaypni/b95cb69fd1c82ca4c2ff
 
import sys
import math
import pandas as pd
import xgboost as xgb 
import numpy as np
from sklearn.model_selection import GridSearchCV

### XGBoostClassifier

In [9]:
class XGBoostClassifier():
    def __init__(self, num_boost_round=10, **params):
        self.clf = None
        self.num_boost_round = num_boost_round
        self.params = params
        self.params.update({'objective': 'multi:softprob', 'verbosity':0})
 
    def fit(self, X, y, num_boost_round=None):
        num_boost_round = num_boost_round or self.num_boost_round
        self.label2num = {label: i for i, label in enumerate(sorted(set(y)))}
        dtrain = xgb.DMatrix(X, label=[self.label2num[label] for label in y])
        self.clf = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=num_boost_round)
 
    def predict(self, X):
        num2label = {i: label for label, i in self.label2num.items()}
        Y = self.predict_proba(X)
        y = np.argmax(Y, axis=1)
        return np.array([num2label[i] for i in y])
 
    def predict_proba(self, X):
        dtest = xgb.DMatrix(X)
        return self.clf.predict(dtest)
 
    def score(self, X, y):
        Y = self.predict_proba(X)
        return 1 / logloss(y, Y)
 
    def get_params(self, deep=True):
        return self.params
 
    def set_params(self, **params):
        if 'num_boost_round' in params:
            self.num_boost_round = params.pop('num_boost_round')
        if 'objective' in params:
            del params['objective']
        self.params.update(params)
        return self

### Utilities

In [3]:
def logloss(y_true, Y_pred):
    label2num = dict((name, i) for i, name in enumerate(sorted(set(y_true))))
    return -1 * sum(math.log(y[label2num[label]]) if y[label2num[label]] > 0 else -np.inf for y, label in zip(Y_pred, y_true)) / len(Y_pred)

### Start grid search

In [10]:
clf = XGBoostClassifier(
    eval_metric = 'auc',
    num_class = 2,
    nthread = 4,
    silent = 1,
)

parameters = {
    'num_boost_round': [100, 250, 500],
    'eta': [0.05, 0.1, 0.3],
    'max_depth': [6, 9, 12],
    'subsample': [0.9, 1.0],
    'colsample_bytree': [0.9, 1.0],
}

clf = GridSearchCV(clf, parameters, n_jobs=1, cv=2)
X = pd.DataFrame(data=[[1,2], [3,4], [2,1], [4,3], [1,0], [4,5]], columns=['f1', 'f2'])
y = np.array(['a', 'b', 'a', 'b', 'a', 'b'])
clf.fit(X , y)

GridSearchCV(cv=2,
             estimator=<__main__.XGBoostClassifier object at 0x00000169699323C8>,
             n_jobs=1,
             param_grid={'colsample_bytree': [0.9, 1.0],
                         'eta': [0.05, 0.1, 0.3], 'max_depth': [6, 9, 12],
                         'num_boost_round': [100, 250, 500],
                         'subsample': [0.9, 1.0]})

### Show best parameters

In [5]:
clf.cv_results_['mean_test_score'][:10]

array([1.18379705, 1.16964695, 1.16694019, 1.15287301, 1.15943199,
       1.15265319, 1.18379705, 1.16964695, 1.16694019, 1.15287301])

In [6]:
grid_parameters, grid_scores= clf.cv_results_['params'], clf.cv_results_['mean_test_score']

In [7]:
best_parameters = max(zip(grid_parameters, grid_scores), key=lambda t: t[1])
best_parameters

({'colsample_bytree': 0.9,
  'eta': 0.05,
  'max_depth': 6,
  'num_boost_round': 100,
  'subsample': 0.9},
 1.183797049705599)

### Making prediction

In [8]:
test = pd.DataFrame(data=[[1,2], [3,4], [2,1], [4,3], [1,0], [4,5]], columns=['f1', 'f2'])
print('predicted:', clf.predict(test))

predicted: ['a' 'b' 'a' 'b' 'a' 'b']
