In [1]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from xgboost import XGBClassifier

Make 20K random samples for classification

In [2]:
X, y = make_classification(n_samples=20000, n_features=6,
    n_informative=2, n_redundant=0, random_state=0, shuffle=True)
X = pd.DataFrame(X, columns=["X1", "X2", "X3", "X4", "X5", "X6"])
y = pd.DataFrame(y, columns=["Y"])
X, y

(             X1        X2        X3        X4        X5        X6
 0      0.294746  1.495717  1.302720 -1.345010  0.479917  0.439403
 1      1.215171  1.581750 -0.812745  1.747872 -1.253870  0.278178
 2      2.142549 -0.408630 -0.837329 -0.625856 -0.525033 -1.399407
 3     -1.235311 -0.496558  1.366129 -0.082971 -0.916224 -0.135516
 4     -0.512206 -0.585146 -1.043216 -1.024099 -1.571402  0.817799
 ...         ...       ...       ...       ...       ...       ...
 19995 -1.129966 -1.196311 -0.465549  2.006302  0.055211  0.360194
 19996 -1.988456  2.431868 -0.704884 -0.051441  0.284882  0.425476
 19997  0.648715 -3.079677 -0.349184  1.059276 -1.451799  2.245202
 19998  0.489563 -1.222035 -0.700792  0.019612 -1.817856 -0.440240
 19999  0.457218  0.751796 -0.446726 -1.092428 -0.331496  1.092609
 
 [20000 rows x 6 columns],
        Y
 0      1
 1      1
 2      0
 3      0
 4      1
 ...   ..
 19995  1
 19996  1
 19997  0
 19998  0
 19999  1
 
 [20000 rows x 1 columns])

With Grid Search + XGB Classifier

In [3]:
# 4 x 3 x 3 x 3 x 3 (cv) = 324 combinations
clf = GridSearchCV(
    estimator=XGBClassifier(),
    param_grid=dict(
        n_estimators=[10, 50],
        learning_rate=[0.01, 0.1, 0.2]),
    cv=3,
    verbose=10)
clf.fit(X, y)

print("Best params:", clf.best_params_)
print("Best score:", clf.best_score_)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV 1/3; 1/6] START learning_rate=0.01, n_estimators=10.........................
[CV 1/3; 1/6] END learning_rate=0.01, n_estimators=10;, score=0.876 total time=   0.3s
[CV 2/3; 1/6] START learning_rate=0.01, n_estimators=10.........................
[CV 2/3; 1/6] END learning_rate=0.01, n_estimators=10;, score=0.876 total time=   0.3s
[CV 3/3; 1/6] START learning_rate=0.01, n_estimators=10.........................
[CV 3/3; 1/6] END learning_rate=0.01, n_estimators=10;, score=0.877 total time=   0.4s
[CV 1/3; 2/6] START learning_rate=0.01, n_estimators=50.........................
[CV 1/3; 2/6] END learning_rate=0.01, n_estimators=50;, score=0.883 total time=   2.5s
[CV 2/3; 2/6] START learning_rate=0.01, n_estimators=50.........................
[CV 2/3; 2/6] END learning_rate=0.01, n_estimators=50;, score=0.880 total time=   2.9s
[CV 3/3; 2/6] START learning_rate=0.01, n_estimators=50.........................
[CV 3/3; 2/6] END l

With Random Search + XGB Classifier

In [4]:
# 6 x 8 x 4 x 5 x 4 x 3 (cv) = 11,520 max combinations
clf = RandomizedSearchCV(
    estimator=XGBClassifier(),
    param_distributions=dict(
        learning_rate=[0.05, 0.10, 0.15, 0.20, 0.25, 0.30],
        max_depth=[3, 4, 5, 6, 8, 10, 12, 15],
        min_child_weight=[1, 3, 5, 7],
        gamma=[0.0, 0.1, 0.2 , 0.3, 0.4],
        colsample_bytree=[0.3, 0.4, 0.5 , 0.7]),
    cv=3,
    verbose=10)
clf.fit(X, y)

print("Best params:", clf.best_params_)
print("Best score:", clf.best_score_)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV 1/3; 1/10] START colsample_bytree=0.4, gamma=0.1, learning_rate=0.05, max_depth=5, min_child_weight=3
[CV 1/3; 1/10] END colsample_bytree=0.4, gamma=0.1, learning_rate=0.05, max_depth=5, min_child_weight=3;, score=0.870 total time=   1.3s
[CV 2/3; 1/10] START colsample_bytree=0.4, gamma=0.1, learning_rate=0.05, max_depth=5, min_child_weight=3
[CV 2/3; 1/10] END colsample_bytree=0.4, gamma=0.1, learning_rate=0.05, max_depth=5, min_child_weight=3;, score=0.863 total time=   1.3s
[CV 3/3; 1/10] START colsample_bytree=0.4, gamma=0.1, learning_rate=0.05, max_depth=5, min_child_weight=3
[CV 3/3; 1/10] END colsample_bytree=0.4, gamma=0.1, learning_rate=0.05, max_depth=5, min_child_weight=3;, score=0.866 total time=   1.1s
[CV 1/3; 2/10] START colsample_bytree=0.3, gamma=0.4, learning_rate=0.1, max_depth=3, min_child_weight=7
[CV 1/3; 2/10] END colsample_bytree=0.3, gamma=0.4, learning_rate=0.1, max_depth=3, min_child_weight=7;, 