## Imports

In [1]:
from utils import get_unsplit_data, get_test_data
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
import numpy as np
import xgboost
import time
import pickle
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

X_train, y_train, = get_unsplit_data()
X_test = get_test_data()

## Crossvalidation Function

In [86]:
def crossvalidate_XGBoost(X_t, y_t, params, K_folds):
    
    start = time.time()
    # Perform cross validation
    clf = GridSearchCV(xgboost.XGBClassifier(), params, cv=K_folds, scoring='accuracy', n_jobs=8, verbose=2)
    clf.fit(X_t, y_t)
    end = time.time()
    
    print("Cross-validation Training Time = ", (end - start))
    print()

    print("Grid scores:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    print()
    
    print("Best parameter set:")
    print(clf.best_params_)
    print()
    
    return

## Hand-tuning

Initial hand-tuning seems to indicate max_depth = 5 seems to work for default parameters. Early stopping indicated ~350 estimators is when overfitting begins. Start with these parameters.  

First try to determine colsample_bytree.

In [5]:
K_folds = 3
params = [{'max_depth': [5], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0], 'subsample':[1], 'colsample_bytree':[.7,.75,.8,.85,.9,.95,1.0], 'reg_lambda':[1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 7 candidates, totalling 21 fits
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.7, subsample=1, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.7, subsample=1, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.7, subsample=1, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.75, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.7, subsample=1, learning_rate=0.1, gamma=0, total= 1.3min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.75, subsample=1, learning_rate=0.1, gamma=0 


  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.7, subsample=1, learning_rate=0.1, gamma=0, total= 1.4min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.75, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.7, subsample=1, learning_rate=0.1, gamma=0, total= 1.4min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.8, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.75, subsample=1, learning_rate=0.1, gamma=0, total= 1.4min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.8, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.75, subsample=1, learning_rate=0.1, gamma=0, total= 1.4min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.8, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.75, subsample=1, learning_rate=0.1, gamma=0, total= 1.5min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.8, subsample=1, learning_rate=0.1, gamma=0, total= 1.5min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.8, subsample=1, learning_rate=0.1, gamma=0, total= 1.6min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.8, subsample=1, learning_rate=0.1, gamma=0, total= 1.5min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.9, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=1, learning_rate=0.1, gamma=0, total= 1.7min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.9, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=1, learning_rate=0.1, gamma=0, total= 1.6min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.9, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=1, learning_rate=0.1, gamma=0, total= 1.6min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.95, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.9, subsample=1, learning_rate=0.1, gamma=0, total= 1.6min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.95, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.9, subsample=1, learning_rate=0.1, gamma=0, total= 1.8min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.95, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.9, subsample=1, learning_rate=0.1, gamma=0, total= 1.7min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=1.0, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.95, subsample=1, learning_rate=0.1, gamma=0, total= 1.8min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=1.0, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.95, subsample=1, learning_rate=0.1, gamma=0, total= 1.7min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=1.0, subsample=1, learning_rate=0.1, gamma=0 


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.95, subsample=1, learning_rate=0.1, gamma=0, total= 1.8min


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=1.0, subsample=1, learning_rate=0.1, gamma=0, total= 1.9min


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=1.0, subsample=1, learning_rate=0.1, gamma=0, total= 1.9min


  if diff:
  if diff:


[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=1.0, subsample=1, learning_rate=0.1, gamma=0, total= 1.7min


[Parallel(n_jobs=4)]: Done  21 out of  21 | elapsed:  9.6min finished


Cross-validation Training Time =  713.4897327423096

Grid scores:

0.840 (+/-0.004) for {'subsample': 1, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.7, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.839 (+/-0.008) for {'subsample': 1, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.75, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.839 (+/-0.004) for {'subsample': 1, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.8, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.005) for {'subsample': 1, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.839 (+/-0.005) for {'subsample': 1, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.9, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.840 (+/-0.006) for {'subsample': 1, 'max_depth': 5, 'reg_lambda': 1, 

Find colsample_bytree = .85 works best.  

Now try subsample.

In [8]:
K_folds = 3
params = [{'max_depth': [5], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0], 'subsample':[.7,.75,.8,.85,.9,.95,1.0], 'colsample_bytree':[.85], 'reg_lambda':[1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 7 candidates, totalling 21 fits
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.7, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.7, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.7, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.75, learning_rate=0.1, gamma=0 
[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.75, learning_rate=0.1, gamma=0, total= 1.9min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.75, learning_rate=0.1, gamma=0 
[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.7, learning_rate=0.1, gamma=

[Parallel(n_jobs=4)]: Done  21 out of  21 | elapsed: 10.6min finished


Cross-validation Training Time =  784.4040081501007

Grid scores:

0.842 (+/-0.011) for {'subsample': 0.7, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.009) for {'subsample': 0.75, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.010) for {'subsample': 0.8, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.841 (+/-0.009) for {'subsample': 0.85, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.843 (+/-0.007) for {'subsample': 0.9, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.840 (+/-0.007) for {'subsample': 0.95, 'max_depth': 5,

Find subsample = .9

Now try gamma.

In [9]:
K_folds = 3
params = [{'max_depth': [5], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0,.2,.4,.6,.8,1.0,1.2,1.4], 'subsample':[.9], 'colsample_bytree':[.85], 'reg_lambda':[1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 8 candidates, totalling 24 fits
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma=0 
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma=0.2 
[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma=0, total= 1.7min
[CV] booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma=0.2 
[CV]  booster=gbtree, max_depth=5, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, learning_rate=0.1, gamma

[Parallel(n_jobs=4)]: Done  24 out of  24 | elapsed: 10.7min finished


Cross-validation Training Time =  795.1574280261993

Grid scores:

0.843 (+/-0.007) for {'subsample': 0.9, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.005) for {'subsample': 0.9, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0.2, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.004) for {'subsample': 0.9, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0.4, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.841 (+/-0.008) for {'subsample': 0.9, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0.6, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.005) for {'subsample': 0.9, 'max_depth': 5, 'reg_lambda': 1, 'gamma': 0.8, 'colsample_bytree': 0.85, 'booster': 'gbtree', 'learning_rate': 0.1, 'n_estimators': 350}
0.842 (+/-0.008) for {'subsample': 0.9, 'max_depth

Find gamma = 0

Now return to tuning of max_depth and min_child_weight. These are the most important parameters, so they will be evaluated together.

In [11]:
K_folds = 3
params = [{'max_depth': [4,5,6,7], 'min_child_weight':[1,3,5,7], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0], 'subsample':[.9], 'colsample_bytree':[.85], 'reg_lambda':[1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0 
[CV] max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0 
[CV] max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0 
[CV] max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=3, learning_rate=0.1, gamma=0 
[CV]  max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0, total= 1.4min
[CV] max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=3, learning_rate=0.1, gamma=0 
[CV]  max_depth=4, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1,

[CV] max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=3, learning_rate=0.1, gamma=0, total= 2.1min
[CV] max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=3, learning_rate=0.1, gamma=0, total= 2.0min
[CV] max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0, total= 2.0min
[CV] max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=7, learning_rate=0.1, gamma=0 
[CV

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed: 15.8min


[CV]  max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=7, learning_rate=0.1, gamma=0, total= 2.1min
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0 
[CV]  max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=7, learning_rate=0.1, gamma=0, total= 2.1min
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0 
[CV]  max_depth=6, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=7, learning_rate=0.1, gamma=0, total= 2.1min
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=3, learning_rate=0.1, gamma=0 
[CV]  max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=1, learning_rate=0.1, gamma=0, to

[Parallel(n_jobs=4)]: Done  48 out of  48 | elapsed: 23.1min finished


Cross-validation Training Time =  1592.945865869522

Grid scores:

0.840 (+/-0.004) for {'max_depth': 4, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 1, 'learning_rate': 0.1, 'n_estimators': 350}
0.836 (+/-0.007) for {'max_depth': 4, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 3, 'learning_rate': 0.1, 'n_estimators': 350}
0.835 (+/-0.008) for {'max_depth': 4, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 5, 'learning_rate': 0.1, 'n_estimators': 350}
0.838 (+/-0.007) for {'max_depth': 4, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 7, 'learning_rate': 0.1, 'n_estimators': 350}
0.843 (+/-0.007) for {'max_depth': 5, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 1, 'learning_rate': 0.1, 'n_estimators': 350}
0.839 (+/-0.005) for {'max_depth': 5, 'reg_lambd

Find max_depth = 7 and min_child_weight = 5. Ideal max_depth was at top of search range. Expand search again.

In [12]:
K_folds = 3
params = [{'max_depth': [7,8,9,10], 'min_child_weight':[4,5,6], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0], 'subsample':[.9], 'colsample_bytree':[.85], 'reg_lambda':[1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0 
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0 
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0 
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0, total= 2.4min
[CV] max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=7, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4,

[CV] max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0, total= 3.4min
[CV] max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0, total= 3.4min
[CV] max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=5, learning_rate=0.1, gamma=0 
[CV]  max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=4, learning_rate=0.1, gamma=0, total= 3.4min
[CV] max_depth=10, reg_lambda=1, n_estimators=350, colsample_bytree=0.85, subsample=0.9, min_child_weight=6, learning_rate=0.1, gamma

[Parallel(n_jobs=4)]: Done  36 out of  36 | elapsed: 26.8min finished


Cross-validation Training Time =  1896.5005655288696

Grid scores:

0.844 (+/-0.005) for {'max_depth': 7, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 4, 'learning_rate': 0.1, 'n_estimators': 350}
0.845 (+/-0.008) for {'max_depth': 7, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 5, 'learning_rate': 0.1, 'n_estimators': 350}
0.843 (+/-0.004) for {'max_depth': 7, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 6, 'learning_rate': 0.1, 'n_estimators': 350}
0.844 (+/-0.008) for {'max_depth': 8, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 4, 'learning_rate': 0.1, 'n_estimators': 350}
0.845 (+/-0.006) for {'max_depth': 8, 'reg_lambda': 1, 'gamma': 0, 'colsample_bytree': 0.85, 'subsample': 0.9, 'min_child_weight': 5, 'learning_rate': 0.1, 'n_estimators': 350}
0.846 (+/-0.006) for {'max_depth': 8, 'reg_lamb

Take second pass over other parameters. Search parameter space more narrowly and try to validate two parameters each time.

In [3]:
K_folds = 3
params = [{'max_depth': [10], 'min_child_weight':[6], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0], 'subsample':[.85,.88,.9,.92,.95], 'colsample_bytree':[.80,.83,.85,.87,.90], 'reg_lambda':[1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1 
[CV] colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1 
[CV] colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1 
[CV] colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1 
[CV]  colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.4min
[CV] colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1 
[CV]  colsample_bytree=0.8, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate

[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.95, reg_lambda=1, total= 3.3min
[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.95, reg_lambda=1, total= 3.2min
[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.95, reg_lambda=1, total= 3.2min
[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed: 31.2min


[CV]  colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.6min
[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.9, reg_lambda=1 
[CV]  colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.6min
[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.9, reg_lambda=1 
[CV]  colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.6min
[CV] colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.92, reg_lambda=1 
[CV]  colsample_bytree=0.85, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.9, reg_l

[CV] colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1 
[CV]  colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.8min
[CV] colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.9, reg_lambda=1 
[CV]  colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.85, reg_lambda=1, total= 4.0min
[CV] colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.9, reg_lambda=1 
[CV]  colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.8min
[CV] colsample_bytree=0.9, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.9, reg_lambda=1 

[Parallel(n_jobs=4)]: Done  75 out of  75 | elapsed: 67.9min finished


Cross-validation Training Time =  4361.710444927216

Grid scores:

0.845 (+/-0.003) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.85, 'reg_lambda': 1}
0.847 (+/-0.006) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 1}
0.847 (+/-0.005) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 1}
0.847 (+/-0.004) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.92, 'reg_lambda': 1}
0.848 (+/-0.003) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.95, 'reg_lambda': 1}
0.846 (+/-0.008) for {'n_estimators': 350, '

In [4]:
K_folds = 3
params = [{'max_depth': [10], 'min_child_weight':[6], 'learning_rate': [.1], 'n_estimators':[350], 'gamma':[0,.05,.10,.15,.20], 'subsample':[.88], 'colsample_bytree':[.83], 'reg_lambda':[.01,.05,.1,.5,1]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_esti

[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.05, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.05, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.5, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.05, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.05, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=1, total= 3.3min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rat

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed: 30.5min


[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05, total= 3.3min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05, total= 3.3min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.5 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.1, n_estimators=350, learning

[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.01, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.1 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learning_rate=0.1, subsample=0.88, reg_lambda=0.05, total= 3.4min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0.2, n_estimators=350, learnin

[Parallel(n_jobs=4)]: Done  75 out of  75 | elapsed: 64.4min finished


Cross-validation Training Time =  4151.135474443436

Grid scores:

0.849 (+/-0.005) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.01}
0.847 (+/-0.003) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.05}
0.848 (+/-0.005) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.1}
0.848 (+/-0.006) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.5}
0.849 (+/-0.006) for {'n_estimators': 350, 'max_depth': 10, 'min_child_weight': 6, 'learning_rate': 0.1, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 1}
0.848 (+/-0.003) for {'n_est

Have tuned gamma = 0 , subsample = .88 , colsample_bytree = .83, reg_lambda = 0.01

## Optimal Learning Rate

Now determine optimal learning rate and number of estimators.

In [73]:
def crossvalidateLearningRate(X_train, y_train, maxEstimators, otherparams, cv_folds, early_stopping_rounds):
    dTrain = xgboost.DMatrix(X_train, label=y_train)
    cvresult = xgboost.cv(otherparams, dTrain, num_boost_round=maxEstimators, nfold=cv_folds, early_stopping_rounds=early_stopping_rounds, as_pandas=False)
    return cvresult

# Find optimal learning rate and number of estimators
rates = [.3, .1, .03, .01]
errorArray = []
nEstArray = []
for rate in rates:
    t1 = time.time()
    clf = xgboost.XGBClassifier(learning_rate = rate,
                                max_depth = 10, 
                                min_child_weight = 6, 
                                gamma = 0, 
                                subsample = .88,
                                colsample_bytree = .83,
                                reg_lambda = .01,
                                n_jobs=4)
    params = clf.get_xgb_params()
    cvOUT = crossvalidateLearningRate(X_train, y_train, 10000, params, 3, 50)
    testErrors = cvOUT['test-error-mean']
    errorSTDs = cvOUT['test-error-std']
    numIterations = len(testErrors)
    finalError = testErrors[-1]
    finalSTD = errorSTDs[-1]
    errorArray.append(finalError)
    nEstArray.append(numIterations)
    t2 = time.time()
    print("Learning rate = %s" % rate)
    print("Cross-validation time = %s" % (t2-t1))
    print("Number of iterations = %s" % numIterations)
    print("Test Error = %s +/- %s" % (finalError, finalSTD))
    print()

# Set optimal rate and number of estimators
ind = np.argmin(errorArray)
optimal_rate = rates[ind]
optimal_n = nEstArray[ind]

Learning rate = 0.3
Cross-validation time = 78.36275434494019
Number of iterations = 122
Test Error = 0.15399966666666667 +/- 0.0017764808533227178

Learning rate = 0.1
Cross-validation time = 142.0630099773407
Number of iterations = 260
Test Error = 0.153 +/- 0.0024515970305088865

Learning rate = 0.03
Cross-validation time = 455.487779378891
Number of iterations = 951
Test Error = 0.15155 +/- 0.0027175931753422255

Learning rate = 0.01
Cross-validation time = 810.1910254955292
Number of iterations = 1741
Test Error = 0.15714999999999998 +/- 0.0009138533799248164



NameError: name 'y_label' is not defined

## Final Optimizations

Take final pass through parameters and optimize model.

In [78]:
K_folds = 5
params = [{'max_depth': [8,9,10,11,12], 'min_child_weight':[4,5,6,7,8], 'learning_rate':[optimal_rate], 'n_estimators':[optimal_n], 'gamma':[0], 'subsample':[.88], 'colsample_bytree':[.83], 'reg_lambda':[.01]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 5 folds for each of 25 candidates, totalling 125 fits


Exception ignored in: <bound method DMatrix.__del__ of <xgboost.core.DMatrix object at 0x7fbf896297b8>>
Traceback (most recent call last):
  File "/home/tony/MachineLearning/env/lib/python3.5/site-packages/xgboost/core.py", line 368, in __del__
    if self.handle is not None:
AttributeError: 'DMatrix' object has no attribute 'handle'


[CV] colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total= 8.8min
[CV] colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=8, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.0

[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total= 9.9min
[CV] colsample_bytree=0.83, max_depth=9, min_child_weight=5, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total= 9.9min
[CV] colsample_bytree=0.83, max_depth=9, min_child_weight=5, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total= 9.8min
[CV] colsample_bytree=0.83, max_depth=9, min_child_weight=5, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=4, gamma=0, n_estimators=951, learning_rate=0.03, 

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed: 81.5min


[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=5, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total= 9.9min
[CV] colsample_bytree=0.83, max_depth=9, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=5, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total= 9.9min
[CV] colsample_bytree=0.83, max_depth=9, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=10.0min
[CV] colsample_bytree=0.83, max_depth=9, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=9, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, 

[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=11.2min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=10.9min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=10, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=11.0min
[CV] colsample_bytree=0.83, max_depth=10, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=

[CV]  colsample_bytree=0.83, max_depth=11, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=12.2min
[CV] colsample_bytree=0.83, max_depth=11, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=11, min_child_weight=6, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=12.1min
[CV] colsample_bytree=0.83, max_depth=11, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=11, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=12.1min
[CV] colsample_bytree=0.83, max_depth=11, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=11, min_child_weight=7, gamma=0, n_estimators=951, learning_rate

[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=13.4min
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=8, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=13.4min
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=8, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.88, reg_lambda=0.01, total=13.2min
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=8, gamma=0, n_estimators=951, learning_rate=

[Parallel(n_jobs=4)]: Done 125 out of 125 | elapsed: 353.5min finished


Cross-validation Training Time =  22144.150705099106

Grid scores:

0.848 (+/-0.007) for {'n_estimators': 951, 'max_depth': 8, 'min_child_weight': 4, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.01}
0.849 (+/-0.006) for {'n_estimators': 951, 'max_depth': 8, 'min_child_weight': 5, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.01}
0.850 (+/-0.006) for {'n_estimators': 951, 'max_depth': 8, 'min_child_weight': 6, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.01}
0.851 (+/-0.006) for {'n_estimators': 951, 'max_depth': 8, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.01}
0.850 (+/-0.006) for {'n_estimators': 951, 'max_depth': 8, 'min_child_weight': 8, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.88, 'reg_lambda': 0.01}
0.851 (+/-0.007) for {

Have max_depth = 12 and min_child_weight = 7.

Final pass for subsample.

In [82]:
K_folds = 4
params = [{'max_depth': [12], 'min_child_weight':[7], 'learning_rate':[optimal_rate], 'n_estimators':[optimal_n], 'gamma':[0], 'subsample':[.85, .875, .90], 'colsample_bytree':[.83], 'reg_lambda':[.01]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 4 folds for each of 3 candidates, totalling 12 fits
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.85, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.85, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.85, reg_lambda=0.01 
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.85, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.85, reg_lambda=0.01, total=12.7min
[CV] colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.875, reg_lambda=0.01 
[CV]  colsample_bytree=0.83, max_depth=12, min_child_weight=7, gamma=0, 

[Parallel(n_jobs=4)]: Done  12 out of  12 | elapsed: 37.9min remaining:    0.0s
[Parallel(n_jobs=4)]: Done  12 out of  12 | elapsed: 37.9min finished


Cross-validation Training Time =  3214.3882310390472

Grid scores:

0.851 (+/-0.008) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.85, 'reg_lambda': 0.01}
0.852 (+/-0.012) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.875, 'reg_lambda': 0.01}
0.852 (+/-0.013) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}

Best parameter set:
{'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.83, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}



Find subsample = .9

Final pass for colsample_bytree.

In [83]:
K_folds = 4
params = [{'max_depth': [12], 'min_child_weight':[7], 'learning_rate':[optimal_rate], 'n_estimators':[optimal_n], 'gamma':[0], 'subsample':[.9], 'colsample_bytree':[.80, .825, .85], 'reg_lambda':[.01]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 4 folds for each of 3 candidates, totalling 12 fits
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01, total=12.0min
[CV] colsample_bytree=0.825, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators

[Parallel(n_jobs=4)]: Done  12 out of  12 | elapsed: 37.2min remaining:    0.0s
[Parallel(n_jobs=4)]: Done  12 out of  12 | elapsed: 37.2min finished


Cross-validation Training Time =  3136.1603841781616

Grid scores:

0.852 (+/-0.011) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}
0.852 (+/-0.011) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.825, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}
0.851 (+/-0.012) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.85, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}

Best parameter set:
{'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}



Find colsample_bytree = .8

Final pass for reg_lambda.

In [85]:
K_folds = 4
params = [{'max_depth': [12], 'min_child_weight':[7], 'learning_rate':[optimal_rate], 'n_estimators':[optimal_n], 'gamma':[0], 'subsample':[.9], 'colsample_bytree':[.8], 'reg_lambda':[.005, .01, .05]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 4 folds for each of 3 candidates, totalling 12 fits
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learnin

[Parallel(n_jobs=6)]: Done   8 out of  12 | elapsed: 31.8min remaining: 15.9min


[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.05, total=15.9min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.01, total=16.0min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.05, total=15.9min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.05, total=15.9min


[Parallel(n_jobs=6)]: Done  12 out of  12 | elapsed: 32.1min finished


Cross-validation Training Time =  2829.668581724167

Grid scores:

0.852 (+/-0.010) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.852 (+/-0.011) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.01}
0.852 (+/-0.010) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.05}

Best parameter set:
{'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}



Find reg_lambda = .005

Final pass for gamma.

In [87]:
K_folds = 4
params = [{'max_depth': [12], 'min_child_weight':[7], 'learning_rate':[optimal_rate], 'n_estimators':[optimal_n], 'gamma':[0, .01, .05], 'subsample':[.9], 'colsample_bytree':[.8], 'reg_lambda':[.005]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 4 folds for each of 3 candidates, totalling 12 fits
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.01, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.01, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.01, n_estimators=95

[Parallel(n_jobs=8)]: Done   4 out of  12 | elapsed: 19.0min remaining: 38.1min


[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=19.0min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=19.1min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.01, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=19.1min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.01, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=19.1min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.05, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=11.9min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0.05, n_estimators=951, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=11.9min
[CV]  colsample_bytree=0.8, max_depth=12, min_child_

[Parallel(n_jobs=8)]: Done  12 out of  12 | elapsed: 31.0min finished


Cross-validation Training Time =  2762.3977806568146

Grid scores:

0.852 (+/-0.010) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.852 (+/-0.012) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0.01, 'subsample': 0.9, 'reg_lambda': 0.005}
0.851 (+/-0.011) for {'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0.05, 'subsample': 0.9, 'reg_lambda': 0.005}

Best parameter set:
{'n_estimators': 951, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}



Find gamma = 0

Re-optimize number of estimators.

In [88]:
def crossvalidateLearningRate(X_train, y_train, maxEstimators, otherparams, cv_folds, early_stopping_rounds):
    dTrain = xgboost.DMatrix(X_train, label=y_train)
    cvresult = xgboost.cv(otherparams, dTrain, num_boost_round=maxEstimators, nfold=cv_folds, early_stopping_rounds=early_stopping_rounds, as_pandas=False)
    return cvresult

# Find optimal number of estimators
t1 = time.time()
clf = xgboost.XGBClassifier(learning_rate = .03,
                            max_depth = 12, 
                            min_child_weight = 7, 
                            gamma = 0, 
                            subsample = .9,
                            colsample_bytree = .8,
                            reg_lambda = .005,
                            n_jobs=8)
params = clf.get_xgb_params()
cvOUT = crossvalidateLearningRate(X_train, y_train, 10000, params, 10, 50)
testErrors = cvOUT['test-error-mean']
errorSTDs = cvOUT['test-error-std']
numIterations = len(testErrors)
finalError = testErrors[-1]
finalSTD = errorSTDs[-1]
t2 = time.time()
print("Cross-validation time = %s" % (t2-t1))
print("Number of iterations = %s" % numIterations)
print("Test Error = %s +/- %s" % (finalError, finalSTD))
print()

Learning rate = 0.01
Cross-validation time = 1949.3256273269653
Number of iterations = 915
Test Error = 0.14725000000000002 +/- 0.004702393007820591



Final Crossvalidation of number of estimators.

In [90]:
K_folds = 10
params = [{'max_depth': [12], 'min_child_weight':[7], 'learning_rate':[.03], 'n_estimators':[900,905,910,915,920,925,930], 'gamma':[0], 'subsample':[.9], 'colsample_bytree':[.8], 'reg_lambda':[.005]}]
crossvalidate_XGBoost(X_train, y_train, params, K_folds)

Fitting 10 folds for each of 7 candidates, totalling 70 fits
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=900, learn

[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=910, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=21.7min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=915, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 


[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed: 86.4min


[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=910, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=21.8min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=915, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=910, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=21.7min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=915, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=910, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=21.7min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=915, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=910, learning_rate=0.03, 

[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=925, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=22.2min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=930, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=925, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=22.1min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=930, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=925, learning_rate=0.03, subsample=0.9, reg_lambda=0.005, total=22.2min
[CV] colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=930, learning_rate=0.03, subsample=0.9, reg_lambda=0.005 
[CV]  colsample_bytree=0.8, max_depth=12, min_child_weight=7, gamma=0, n_estimators=925, learning_rate=0.03, 

[Parallel(n_jobs=8)]: Done  70 out of  70 | elapsed: 193.8min finished


Cross-validation Training Time =  12526.923834085464

Grid scores:

0.851 (+/-0.013) for {'n_estimators': 900, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.851 (+/-0.013) for {'n_estimators': 905, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.851 (+/-0.015) for {'n_estimators': 910, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.851 (+/-0.015) for {'n_estimators': 915, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.851 (+/-0.014) for {'n_estimators': 920, 'max_depth': 12, 'min_child_weight': 7, 'learning_rate': 0.03, 'colsample_bytree': 0.8, 'gamma': 0, 'subsample': 0.9, 'reg_lambda': 0.005}
0.851 (+/-0.015) for {

Find n_estimators = 930.

Train final model and make test predictions.

In [91]:
# Train model
final_model = xgboost.XGBClassifier(learning_rate = .03,
                                    n_estimators = 930,
                                    max_depth = 12, 
                                    min_child_weight = 7, 
                                    gamma = 0, 
                                    subsample = .9,
                                    colsample_bytree = .8,
                                    reg_lambda = .005,
                                    n_jobs=8)
final_model.fit(X_train, y_train)

# Predict
y_label = final_model.predict(X_test)
y_label = y_label.reshape((len(y_label),1))
result_col_1 = (np.array(range(len(y_label)))+1).reshape((len(y_label),1))
result = np.concatenate((result_col_1,y_label), axis = 1)
np.savetxt("XGBoost_refined_pred_labels.txt", result, fmt="%d", delimiter=',', header='Id,Prediction')