In [1]:
import json
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC, LinearSVC
from sklearn.multiclass import OneVsRestClassifier

In [2]:
train = json.load(open('../_data/train.json'))
test = json.load(open('../_data/test.json'))

In [3]:
train_as_text = [' '.join(sample['ingredients']).lower() for sample in train]
train_cuisine = [sample['cuisine'] for sample in train]

test_as_text = [' '.join(sample['ingredients']).lower() for sample in test]

In [4]:
train_as_text[283]

'bertolli® classico olive oil boneless skinless chicken breast halves eggs linguine chicken broth bacon, crisp-cooked and crumbled bertolli vineyard premium collect marinara with burgundi wine sauc bread crumb fresh shredded mozzarella cheese'

In [5]:
from time import strftime

def print_time():
    print(strftime('%y%m%d-%H%M%S'))
    return

In [6]:
from sklearn.pipeline import FeatureUnion

In [7]:
def itself(x):
    return x

In [9]:
import re

In [10]:
SPEC_REMOVE = re.compile(r'(\'|\’|\(.*oz.*\)|(\()|(\)))')
SPEC_AND = re.compile(r'\&')
SPEC_ELSE = re.compile(r'[^\w\s\%_]')

def clean_ingr(ingr):
    ingr = re.sub(SPEC_REMOVE, '', ingr)
    ingr = re.sub(SPEC_AND, 'and', ingr)
    ingr = re.sub(SPEC_ELSE, ' ', ingr)
    return ' '.join(ingr.split())

In [11]:
def get_ingrs(given):
    ingrs = [[clean_ingr(i).lower() for i in recipe['ingredients']] for recipe in given]
    return ingrs

def get_labels(given):
    return [r['cuisine'] for r in given]

In [12]:
%%time
print_time()
train_ingrs = get_ingrs(train)
print_time()

180410-160646
180410-160648
CPU times: user 1.77 s, sys: 21.2 ms, total: 1.79 s
Wall time: 1.79 s


In [13]:
def combine_words(ilist):
    return ' '.join(ilist)

In [14]:
%%time
dvec_all = FeatureUnion([
        ("ingrs", TfidfVectorizer(strip_accents='unicode',
                                  tokenizer=itself,
                                  preprocessor=itself)),
        ("words", TfidfVectorizer(strip_accents='unicode',
                                  preprocessor=combine_words,
                                  #stop_words='english',
                                  )),
        ]).fit(get_ingrs(train+test))

CPU times: user 3.69 s, sys: 42.1 ms, total: 3.73 s
Wall time: 3.73 s


In [14]:
%%time
train_mat = dvec_all.transform(train_ingrs)
train_mat

CPU times: user 1.01 s, sys: 7.65 ms, total: 1.02 s
Wall time: 1.02 s


In [15]:
import xgboost as xgb
from xgboost import XGBClassifier

In [16]:
train_mat.shape

(39774, 10228)

In [17]:
feats = np.array(dvec_all.get_feature_names())

In [18]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [19]:
train_labels0 = [x['cuisine'] for x in train]
train_labels = le.fit_transform(train_labels0)

In [20]:
le.classes_

array(['brazilian', 'british', 'cajun_creole', 'chinese', 'filipino',
       'french', 'greek', 'indian', 'irish', 'italian', 'jamaican',
       'japanese', 'korean', 'mexican', 'moroccan', 'russian',
       'southern_us', 'spanish', 'thai', 'vietnamese'],
      dtype='<U12')

In [21]:
train_labels

array([ 6, 16,  4, ...,  8,  3, 13])

In [30]:
#Choose all predictors except target & IDcols
#predictors = [x for x in train.columns if x not in [target, IDcol]]

xgb1 = XGBClassifier(
    learning_rate=0.3,
    n_estimators=1000,
    max_depth=5,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20,
    n_jobs=8,
    seed=27)

In [31]:
xgb_param = xgb1.get_xgb_params()

In [32]:
xgtrain = xgb.DMatrix(data=train_mat,
                      label=train_labels,
                      feature_names=feats)

In [33]:
%%time
print_time()

cvresult = xgb.cv(xgb_param,
                  xgtrain,
                  num_boost_round=xgb1.get_params()['n_estimators'],
                  nfold=3,
                  metrics='merror',
                  early_stopping_rounds=50,
                  verbose_eval=True)

print_time()

180407-023007
[0]	train-merror:0.401996+0.00735423	test-merror:0.4249+0.00199022
[1]	train-merror:0.333534+0.00286543	test-merror:0.365968+0.00513347
[2]	train-merror:0.304281+0.00488144	test-merror:0.343692+0.00335155
[3]	train-merror:0.287663+0.00429593	test-merror:0.330115+0.00256661
[4]	train-merror:0.274137+0.00269431	test-merror:0.320989+0.00428141
[5]	train-merror:0.264809+0.00360944	test-merror:0.313949+0.00271957
[6]	train-merror:0.255418+0.00319988	test-merror:0.307965+0.00411684
[7]	train-merror:0.248089+0.00346089	test-merror:0.302534+0.00474215
[8]	train-merror:0.240446+0.00288062	test-merror:0.298059+0.00509927
[9]	train-merror:0.2344+0.00293955	test-merror:0.294388+0.00491602
[10]	train-merror:0.227511+0.00361409	test-merror:0.289863+0.00501726
[11]	train-merror:0.221363+0.00219797	test-merror:0.287374+0.00491201
[12]	train-merror:0.215656+0.00175087	test-merror:0.283703+0.00544241
[13]	train-merror:0.210213+0.0017172	test-merror:0.28046+0.00605561
[14]	train-merror:0.20

[117]	train-merror:0.04425+0.00152584	test-merror:0.224091+0.00438118
[118]	train-merror:0.043747+0.00137123	test-merror:0.223865+0.00396425
[119]	train-merror:0.043282+0.00149147	test-merror:0.223563+0.00367371
[120]	train-merror:0.0426533+0.00144318	test-merror:0.223915+0.0041952
[121]	train-merror:0.0424147+0.00160279	test-merror:0.223915+0.00416775
[122]	train-merror:0.041824+0.00175517	test-merror:0.223563+0.00393997
[123]	train-merror:0.0412707+0.00168268	test-merror:0.223714+0.00365837
[124]	train-merror:0.0409563+0.00162705	test-merror:0.223286+0.00408003
[125]	train-merror:0.040428+0.00155439	test-merror:0.223211+0.00388406
[126]	train-merror:0.0399003+0.00159702	test-merror:0.222985+0.00381901
[127]	train-merror:0.039423+0.00157695	test-merror:0.223161+0.00376494
[128]	train-merror:0.0390583+0.00168696	test-merror:0.222935+0.00370341
[129]	train-merror:0.0384673+0.00156638	test-merror:0.222859+0.00369779
[130]	train-merror:0.0379017+0.00162355	test-merror:0.22306+0.00369574
[

[231]	train-merror:0.0110373+9.89759e-05	test-merror:0.221351+0.0050389
[232]	train-merror:0.0108617+0.000106537	test-merror:0.221225+0.00510113
[233]	train-merror:0.0107357+9.92718e-05	test-merror:0.22125+0.0048456
[234]	train-merror:0.0107353+0.000108149	test-merror:0.22125+0.00476633
[235]	train-merror:0.010648+8.90955e-05	test-merror:0.221325+0.00476722
[236]	train-merror:0.0104843+0.000134026	test-merror:0.221175+0.00469102
[237]	train-merror:0.010346+0.000222573	test-merror:0.221451+0.00471389
[238]	train-merror:0.0102203+0.000294094	test-merror:0.221426+0.00468878
[239]	train-merror:0.0100067+0.000223043	test-merror:0.221426+0.00462125
[240]	train-merror:0.00994367+0.00018833	test-merror:0.221677+0.00425903
[241]	train-merror:0.00989333+0.000174727	test-merror:0.221476+0.00443341
[242]	train-merror:0.00973033+0.000349774	test-merror:0.221175+0.00436083
[243]	train-merror:0.00965433+0.000282317	test-merror:0.221401+0.00407779
[244]	train-merror:0.00954133+0.000325734	test-merror:

In [34]:
cvresult

Unnamed: 0,test-merror-mean,test-merror-std,train-merror-mean,train-merror-std
0,0.424900,0.001990,0.401996,0.007354
1,0.365968,0.005133,0.333534,0.002865
2,0.343692,0.003352,0.304281,0.004881
3,0.330115,0.002567,0.287663,0.004296
4,0.320989,0.004281,0.274137,0.002694
5,0.313949,0.002720,0.264809,0.003609
6,0.307965,0.004117,0.255418,0.003200
7,0.302534,0.004742,0.248089,0.003461
8,0.298059,0.005099,0.240446,0.002881
9,0.294388,0.004916,0.234400,0.002940


In [35]:
cvresult.shape[0]

211

In [36]:
xgb1.set_params(n_estimators=cvresult.shape[0])

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.8, gamma=0, learning_rate=0.3, max_delta_step=0,
       max_depth=5, min_child_weight=1, missing=None, n_estimators=211,
       n_jobs=8, nthread=None, num_class=20, objective='multi:softmax',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=27, silent=True, subsample=0.8)

In [37]:
from sklearn.model_selection import GridSearchCV

In [45]:
param_test1 = {
    'max_depth': range(3, 11, 1),
    'min_child_weight': range(1, 6, 2)
}
gsearch1 = GridSearchCV(estimator=XGBClassifier(
    learning_rate=0.3,
    n_estimators=211,
    max_depth=5,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20),
                        param_grid=param_test1,
                        scoring='accuracy',
                        n_jobs=-1,
                        iid=False,
                        cv=5,
                        verbose=50)

In [46]:
%%time
print_time()

gsearch1.fit(train_mat, train_labels)

print_time()

180407-024930
Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] max_depth=3, min_child_weight=1 .................................
[CV] max_depth=3, min_child_weight=1 .................................
[CV] max_depth=3, min_child_weight=1 .................................
[CV] max_depth=3, min_child_weight=1 .................................
[CV] max_depth=3, min_child_weight=1 .................................
[CV] max_depth=3, min_child_weight=3 .................................
[CV] max_depth=3, min_child_weight=3 .................................
[CV] max_depth=3, min_child_weight=3 .................................
[CV]  max_depth=3, min_child_weight=3, score=0.7817998994469583, total=10.4min
[CV] max_depth=3, min_child_weight=3 .................................
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 10.8min
[CV]  max_depth=3, min_child_weight=3, score=0.7815875408188897, total=10.4min
[CV] max_depth=3, min_child_weight=3 ............................

[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed: 65.0min
[CV]  max_depth=5, min_child_weight=1, score=0.7854449472096531, total=16.0min
[CV] max_depth=5, min_child_weight=5 .................................
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 65.2min
[CV]  max_depth=5, min_child_weight=1, score=0.786460299484082, total=15.7min
[CV] max_depth=6, min_child_weight=1 .................................
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 65.3min
[CV]  max_depth=5, min_child_weight=3, score=0.7751226260847692, total=15.1min
[CV] max_depth=6, min_child_weight=1 .................................
[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 67.8min
[CV]  max_depth=5, min_child_weight=3, score=0.7903611425695231, total=14.9min
[CV] max_depth=6, min_child_weight=1 .................................
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 69.9min
[CV]  max_depth=5, min_child_weight=5, score=0.7823411203215273, total=14.6min
[CV] max_dep

[CV] max_depth=8, min_child_weight=3 .................................
[Parallel(n_jobs=-1)]: Done  75 tasks      | elapsed: 155.1min
[CV]  max_depth=8, min_child_weight=1, score=0.7808339613162522, total=22.3min
[CV] max_depth=8, min_child_weight=3 .................................
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed: 159.4min
[CV]  max_depth=8, min_child_weight=1, score=0.7929127921588339, total=22.2min
[CV] max_depth=8, min_child_weight=3 .................................
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed: 162.0min
[CV]  max_depth=8, min_child_weight=1, score=0.7839366515837104, total=22.4min
[CV] max_depth=8, min_child_weight=5 .................................
[Parallel(n_jobs=-1)]: Done  78 tasks      | elapsed: 165.7min
[CV]  max_depth=8, min_child_weight=1, score=0.7721041378442963, total=22.3min
[CV] max_depth=8, min_child_weight=5 .................................
[Parallel(n_jobs=-1)]: Done  79 tasks      | elapsed: 167.6min
[CV]  max_dept

[Parallel(n_jobs=-1)]: Done 117 out of 120 | elapsed: 282.1min remaining:  7.2min
[CV]  max_depth=10, min_child_weight=5, score=0.7831113345061573, total=22.6min
[CV]  max_depth=10, min_child_weight=5, score=0.7799169497923745, total=19.2min
[CV]  max_depth=10, min_child_weight=5, score=0.7670733241101748, total=21.0min
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 285.1min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 285.1min finished
180407-074554
CPU times: user 11min 34s, sys: 3.11 s, total: 11min 37s
Wall time: 4h 56min 23s


In [47]:
gsearch1.best_params_

{'max_depth': 7, 'min_child_weight': 1}

In [49]:
gsearch1.best_score_

0.78561274309478224

In [50]:
gsearch1.cv_results_



{'mean_fit_time': array([  642.73628664,   620.32630491,   610.20008245,   795.06631241,
          761.16714473,   736.69665003,   934.5931356 ,   888.02642112,
          862.31046548,  1070.54939513,  1017.09788384,   988.94769073,
         1200.4282516 ,  1144.77520609,  1108.63498764,  1329.17790799,
         1265.39946551,  1228.72782454,  1454.07945151,  1378.95135436,
         1342.56531854,  1564.9937315 ,  1484.94002166,  1241.00404072]),
 'mean_score_time': array([  8.00048122,   5.67665977,   7.59882894,   9.86657515,
          7.43226314,   8.07996292,   9.97451515,   8.15625772,
          9.05713754,  10.89853811,   9.66295781,   9.56487074,
         11.63484101,  10.78250852,  10.31252189,  12.60945606,
         11.94687705,  10.80729427,  13.88318534,  12.65755172,
         11.58593607,  14.73599043,  12.58740435,   7.3504014 ]),
 'mean_test_score': array([ 0.78317396,  0.78226922,  0.77993017,  0.78521043,  0.78420481,
         0.78191696,  0.78380319,  0.78438218,  0.78

In [52]:
param_test2 = {
    'gamma': np.linspace(0, 0.5, 11),
}
gsearch2 = GridSearchCV(estimator=XGBClassifier(
    learning_rate=0.3,
    n_estimators=211,
    max_depth=7,
    min_child_weight=1,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20),
                        param_grid=param_test2,
                        scoring='accuracy',
                        n_jobs=-1,
                        iid=False,
                        cv=5,
                        verbose=50)

In [53]:
%%time
print_time()

gsearch2.fit(train_mat, train_labels)

print_time()

180407-092212
Fitting 5 folds for each of 11 candidates, totalling 55 fits
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.05 ......................................................
[CV] gamma=0.05 ......................................................
[CV] gamma=0.05 ......................................................
[CV] .............. gamma=0.0, score=0.7951746670017592, total=20.6min
[CV] gamma=0.05 ......................................................
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 22.1min
[CV] .............. gamma=0.0, score=0.7847274554132128, total=20.7min
[CV] gamma=0.05 ......................................................
[Parallel(n

[CV] .............. gamma=0.3, score=0.7862086321882471, total=20.4min
[CV] gamma=0.45 ......................................................
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed: 108.6min
[CV] ............. gamma=0.35, score=0.7907386435132755, total=20.4min
[CV] gamma=0.45 ......................................................
[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed: 108.8min
[CV] ............. gamma=0.35, score=0.7856963298139769, total=20.8min
[CV] gamma=0.45 ......................................................
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 109.0min
[CV] .............. gamma=0.4, score=0.7856066314996232, total=20.3min
[CV] gamma=0.45 ......................................................
[CV] .............. gamma=0.4, score=0.7859477124183006, total=20.2min
[CV] gamma=0.45 ......................................................
[Parallel(n_jobs=-1)]: Done  42 out of  55 | elapsed: 129.5min remaining: 40.1min
[CV] ..............

In [54]:
gsearch2.best_params_

{'gamma': 0.35000000000000003}

In [56]:
gsearch2.grid_scores_



[mean: 0.78561, std: 0.00610, params: {'gamma': 0.0},
 mean: 0.78521, std: 0.00669, params: {'gamma': 0.050000000000000003},
 mean: 0.78418, std: 0.00523, params: {'gamma': 0.10000000000000001},
 mean: 0.78478, std: 0.00650, params: {'gamma': 0.15000000000000002},
 mean: 0.78596, std: 0.00538, params: {'gamma': 0.20000000000000001},
 mean: 0.78627, std: 0.00677, params: {'gamma': 0.25},
 mean: 0.78531, std: 0.00701, params: {'gamma': 0.30000000000000004},
 mean: 0.78735, std: 0.00708, params: {'gamma': 0.35000000000000003},
 mean: 0.78581, std: 0.00680, params: {'gamma': 0.40000000000000002},
 mean: 0.78511, std: 0.00805, params: {'gamma': 0.45000000000000001},
 mean: 0.78677, std: 0.00642, params: {'gamma': 0.5}]

In [59]:
param_test3 = {
    'gamma': np.linspace(0, 0.5, 21),
}
gsearch3 = GridSearchCV(estimator=XGBClassifier(
    learning_rate=0.3,
    n_estimators=211,
    max_depth=7,
    min_child_weight=1,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20),
                        param_grid=param_test3,
                        scoring='accuracy',
                        n_jobs=7,
                        iid=False,
                        cv=5,
                        verbose=50)

In [60]:
%%time
print_time()

gsearch3.fit(train_mat, train_labels)

print_time()

180407-142417
Fitting 5 folds for each of 21 candidates, totalling 105 fits
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.0 .......................................................
[CV] gamma=0.025 .....................................................
[CV] gamma=0.025 .....................................................
[CV] .............. gamma=0.0, score=0.7879703032590915, total=14.9min
[CV] gamma=0.025 .....................................................
[Parallel(n_jobs=7)]: Done   1 tasks      | elapsed: 15.4min
[CV] ............ gamma=0.025, score=0.7929127921588339, total=17.3min
[CV] gamma=0.025 .....................................................
[Parallel(n_jobs=7)]: Done   2 tasks      | elapsed: 17.8min
[CV] .............. g

[Parallel(n_jobs=7)]: Done  38 tasks      | elapsed: 125.1min
[CV] ............ gamma=0.175, score=0.7843137254901961, total=21.8min
[CV] gamma=0.225 .....................................................
[Parallel(n_jobs=7)]: Done  39 tasks      | elapsed: 125.8min
[CV] ............ gamma=0.175, score=0.7739906929945919, total=21.5min
[CV] gamma=0.225 .....................................................
[Parallel(n_jobs=7)]: Done  40 tasks      | elapsed: 126.6min
[CV] ............... gamma=0.2, score=0.787490580256217, total=21.5min
[CV] gamma=0.225 .....................................................
[Parallel(n_jobs=7)]: Done  41 tasks      | elapsed: 127.5min
[CV] ............ gamma=0.175, score=0.7896061406820184, total=21.9min
[CV] gamma=0.225 .....................................................
[Parallel(n_jobs=7)]: Done  42 tasks      | elapsed: 127.9min
[CV] .............. gamma=0.2, score=0.7930384518723297, total=21.8min
[CV] gamma=0.225 ..................................

[CV] ............. gamma=0.375, score=0.796305604423222, total=18.4min
[CV] gamma=0.425 .....................................................
[Parallel(n_jobs=7)]: Done  79 tasks      | elapsed: 230.6min
[CV] ............ gamma=0.375, score=0.7773864922651239, total=19.9min
[CV] gamma=0.425 .....................................................
[Parallel(n_jobs=7)]: Done  80 tasks      | elapsed: 233.1min
[CV] .............. gamma=0.4, score=0.7738649226512389, total=13.0min
[CV] gamma=0.425 .....................................................
[Parallel(n_jobs=7)]: Done  81 tasks      | elapsed: 241.2min
[CV] .............. gamma=0.4, score=0.7856066314996232, total=19.9min
[CV] gamma=0.425 .....................................................
[Parallel(n_jobs=7)]: Done  82 tasks      | elapsed: 241.5min
[CV] .............. gamma=0.4, score=0.7859477124183006, total=17.2min
[CV] gamma=0.425 .....................................................
[Parallel(n_jobs=7)]: Done  83 tasks      

In [61]:
gsearch3.best_params_

{'gamma': 0.35000000000000003}

In [62]:
gsearch3.grid_scores_



[mean: 0.78561, std: 0.00610, params: {'gamma': 0.0},
 mean: 0.78350, std: 0.00569, params: {'gamma': 0.025000000000000001},
 mean: 0.78521, std: 0.00669, params: {'gamma': 0.050000000000000003},
 mean: 0.78496, std: 0.00635, params: {'gamma': 0.075000000000000011},
 mean: 0.78418, std: 0.00523, params: {'gamma': 0.10000000000000001},
 mean: 0.78478, std: 0.00709, params: {'gamma': 0.125},
 mean: 0.78478, std: 0.00650, params: {'gamma': 0.15000000000000002},
 mean: 0.78594, std: 0.00666, params: {'gamma': 0.17500000000000002},
 mean: 0.78596, std: 0.00538, params: {'gamma': 0.20000000000000001},
 mean: 0.78571, std: 0.00706, params: {'gamma': 0.22500000000000001},
 mean: 0.78627, std: 0.00677, params: {'gamma': 0.25},
 mean: 0.78554, std: 0.00577, params: {'gamma': 0.27500000000000002},
 mean: 0.78531, std: 0.00701, params: {'gamma': 0.30000000000000004},
 mean: 0.78664, std: 0.00739, params: {'gamma': 0.32500000000000001},
 mean: 0.78735, std: 0.00708, params: {'gamma': 0.350000000000

## Re-search best n estimators
* max_depth 7
* min_child_weight 1
* gamma 0.35

In [69]:
#Choose all predictors except target & IDcols
#predictors = [x for x in train.columns if x not in [target, IDcol]]

xgb4 = XGBClassifier(
    learning_rate=0.3,
    n_estimators=1000,
    max_depth=7,
    min_child_weight=1,
    gamma=0.35,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20,
    n_jobs=8,
    seed=27)

In [70]:
xgb_param4 = xgb4.get_xgb_params()

In [71]:
xgtrain4 = xgb.DMatrix(data=train_mat,
                      label=train_labels,
                      feature_names=feats)

In [72]:
%%time
print_time()

cvresult = xgb.cv(xgb_param4,
                  xgtrain4,
                  num_boost_round=xgb4.get_params()['n_estimators'],
                  nfold=3,
                  metrics='merror',
                  early_stopping_rounds=50,
                  verbose_eval=True)

print_time()

180407-201950
[0]	train-merror:0.359066+0.00750225	test-merror:0.391914+0.00191514
[1]	train-merror:0.29249+0.00580362	test-merror:0.341228+0.00432246
[2]	train-merror:0.262923+0.00634197	test-merror:0.322095+0.00309737
[3]	train-merror:0.247888+0.00750713	test-merror:0.312541+0.00227912
[4]	train-merror:0.235782+0.00625224	test-merror:0.304344+0.00489874
[5]	train-merror:0.22394+0.00613141	test-merror:0.297883+0.00349578
[6]	train-merror:0.213934+0.00540562	test-merror:0.292226+0.00443771
[7]	train-merror:0.204568+0.00565858	test-merror:0.286971+0.00449594
[8]	train-merror:0.194939+0.0052391	test-merror:0.282974+0.00460129
[9]	train-merror:0.187283+0.0056751	test-merror:0.279102+0.00355724
[10]	train-merror:0.180017+0.00526976	test-merror:0.275859+0.00404864
[11]	train-merror:0.173417+0.00522945	test-merror:0.273143+0.00363775
[12]	train-merror:0.16732+0.00471622	test-merror:0.270327+0.00409868
[13]	train-merror:0.161475+0.00404006	test-merror:0.267713+0.00321238
[14]	train-merror:0.1

[117]	train-merror:0.016594+0.000687989	test-merror:0.222356+0.00498108
[118]	train-merror:0.0162417+0.000542808	test-merror:0.222608+0.00496597
[119]	train-merror:0.0158143+0.000774029	test-merror:0.222356+0.00489663
[120]	train-merror:0.0155503+0.00066953	test-merror:0.222356+0.00441133
[121]	train-merror:0.0152487+0.000660727	test-merror:0.22208+0.00458076
[122]	train-merror:0.0150347+0.000572081	test-merror:0.222155+0.00448426
[123]	train-merror:0.0149593+0.000729452	test-merror:0.222205+0.0047977
[124]	train-merror:0.0145447+0.000595811	test-merror:0.22213+0.00454135
[125]	train-merror:0.0141927+0.000529791	test-merror:0.22213+0.00464622
[126]	train-merror:0.014067+0.000617898	test-merror:0.221703+0.00449289
[127]	train-merror:0.0137527+0.00068142	test-merror:0.221778+0.00427216
[128]	train-merror:0.0135013+0.000679782	test-merror:0.221879+0.00420212
[129]	train-merror:0.0131997+0.000655383	test-merror:0.221552+0.00410926
[130]	train-merror:0.0131367+0.00057287	test-merror:0.22137

[230]	train-merror:0.00290367+0.000373319	test-merror:0.220244+0.00587802
[231]	train-merror:0.002816+0.000337737	test-merror:0.220094+0.00579271
[232]	train-merror:0.00279067+0.000349774	test-merror:0.219968+0.00573159
[233]	train-merror:0.00276533+0.000339146	test-merror:0.219993+0.00587864
[234]	train-merror:0.002728+0.000339418	test-merror:0.219842+0.00603156
[235]	train-merror:0.00272767+0.000291095	test-merror:0.21954+0.00567262
[236]	train-merror:0.00269033+0.000258325	test-merror:0.219591+0.00560959
[237]	train-merror:0.00267767+0.000268613	test-merror:0.219792+0.00569374
[238]	train-merror:0.00260233+0.000293624	test-merror:0.219968+0.00553901
[239]	train-merror:0.00258967+0.000275737	test-merror:0.219918+0.00568771
[240]	train-merror:0.002539+0.0002393	test-merror:0.220068+0.00563925
[241]	train-merror:0.00252667+0.000268613	test-merror:0.220018+0.00564485
[242]	train-merror:0.00243867+0.00022278	test-merror:0.219918+0.00567966
[243]	train-merror:0.00242633+0.000248733	test-m

In [73]:
cvresult

Unnamed: 0,test-merror-mean,test-merror-std,train-merror-mean,train-merror-std
0,0.391914,0.001915,0.359066,0.007502
1,0.341228,0.004322,0.292490,0.005804
2,0.322095,0.003097,0.262923,0.006342
3,0.312541,0.002279,0.247888,0.007507
4,0.304344,0.004899,0.235782,0.006252
5,0.297883,0.003496,0.223940,0.006131
6,0.292226,0.004438,0.213934,0.005406
7,0.286971,0.004496,0.204568,0.005659
8,0.282974,0.004601,0.194939,0.005239
9,0.279102,0.003557,0.187283,0.005675


In [74]:
cvresult.shape[0]

263

In [75]:
xgb4.set_params(n_estimators=cvresult.shape[0])

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.8, gamma=0.35, learning_rate=0.3,
       max_delta_step=0, max_depth=7, min_child_weight=1, missing=None,
       n_estimators=263, n_jobs=8, nthread=None, num_class=20,
       objective='multi:softmax', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=27, silent=True,
       subsample=0.8)

subsample  
colsample by tree

In [81]:
param_test5 = {
    'subsample': np.linspace(0.5, 1, 6),
    'colsample_bytree': np.linspace(0.5, 1, 6)
}
gsearch5 = GridSearchCV(estimator=XGBClassifier(
    learning_rate=0.3,
    n_estimators=263,
    max_depth=7,
    min_child_weight=1,
    gamma=0.35,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20),
                        param_grid=param_test5,
                        scoring='accuracy',
                        n_jobs=8,
                        iid=False,
                        cv=5,
                        verbose=50)

In [82]:
%%time
print_time()

gsearch5.fit(train_mat, train_labels)

print_time()

180408-003019
Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] colsample_bytree=0.5, subsample=0.5 .............................
[CV] colsample_bytree=0.5, subsample=0.5 .............................
[CV] colsample_bytree=0.5, subsample=0.5 .............................
[CV] colsample_bytree=0.5, subsample=0.5 .............................
[CV] colsample_bytree=0.5, subsample=0.5 .............................
[CV] colsample_bytree=0.5, subsample=0.6 .............................
[CV] colsample_bytree=0.5, subsample=0.6 .............................
[CV] colsample_bytree=0.5, subsample=0.6 .............................
[CV]  colsample_bytree=0.5, subsample=0.5, score=0.788514702186479, total=16.7min
[CV] colsample_bytree=0.5, subsample=0.6 .............................
[Parallel(n_jobs=8)]: Done   1 tasks      | elapsed: 18.0min
[CV]  colsample_bytree=0.5, subsample=0.5, score=0.783943626525733, total=16.7min
[CV] colsample_bytree=0.5, subsample=0.6 ...................

[CV]  colsample_bytree=0.6, subsample=0.6, score=0.7900226187484293, total=19.3min
[CV] colsample_bytree=0.6, subsample=0.7 .............................
[CV] colsample_bytree=0.6, subsample=0.7 .............................
[Parallel(n_jobs=8)]: Done  36 tasks      | elapsed: 95.3min
[Parallel(n_jobs=8)]: Done  37 tasks      | elapsed: 95.3min
[CV]  colsample_bytree=0.6, subsample=0.6, score=0.7850678733031674, total=19.3min
[CV] colsample_bytree=0.6, subsample=0.8 .............................
[Parallel(n_jobs=8)]: Done  38 tasks      | elapsed: 95.7min
[CV]  colsample_bytree=0.6, subsample=0.6, score=0.7697145013205886, total=19.3min
[CV] colsample_bytree=0.6, subsample=0.8 .............................
[Parallel(n_jobs=8)]: Done  39 tasks      | elapsed: 96.1min
[CV]  colsample_bytree=0.6, subsample=0.6, score=0.7855794639486599, total=19.4min
[CV] colsample_bytree=0.6, subsample=0.8 .............................
[Parallel(n_jobs=8)]: Done  40 tasks      | elapsed: 96.7min
[CV]  co

[CV] colsample_bytree=0.7, subsample=0.9 .............................
[Parallel(n_jobs=8)]: Done  74 tasks      | elapsed: 202.2min
[CV]  colsample_bytree=0.7, subsample=0.7, score=0.7901094752736881, total=22.2min
[CV] colsample_bytree=0.7, subsample=0.9 .............................
[Parallel(n_jobs=8)]: Done  75 tasks      | elapsed: 203.7min
[CV]  colsample_bytree=0.7, subsample=0.8, score=0.7803315749811605, total=22.5min
[CV] colsample_bytree=0.7, subsample=0.9 .............................
[Parallel(n_jobs=8)]: Done  76 tasks      | elapsed: 204.5min
[CV]  colsample_bytree=0.7, subsample=0.8, score=0.7921588338778588, total=22.6min
[CV] colsample_bytree=0.7, subsample=0.9 .............................
[Parallel(n_jobs=8)]: Done  77 tasks      | elapsed: 204.8min
[CV]  colsample_bytree=0.7, subsample=0.8, score=0.7859477124183006, total=22.5min
[CV] colsample_bytree=0.7, subsample=1.0 .............................
[Parallel(n_jobs=8)]: Done  78 tasks      | elapsed: 205.6min
[CV

[CV] colsample_bytree=0.8, subsample=1.0 .............................
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed: 307.1min
[CV]  colsample_bytree=0.8, subsample=0.9, score=0.7858220211161387, total=25.6min
[CV] colsample_bytree=0.9, subsample=0.5 .............................
[Parallel(n_jobs=8)]: Done 113 tasks      | elapsed: 324.6min
[CV]  colsample_bytree=0.8, subsample=0.9, score=0.7771349515784178, total=25.2min
[CV] colsample_bytree=0.9, subsample=0.5 .............................
[Parallel(n_jobs=8)]: Done 114 tasks      | elapsed: 326.8min
[CV]  colsample_bytree=0.8, subsample=0.9, score=0.7879703032590915, total=25.2min
[CV] colsample_bytree=0.9, subsample=0.5 .............................
[Parallel(n_jobs=8)]: Done 115 tasks      | elapsed: 328.1min
[CV]  colsample_bytree=0.8, subsample=1.0, score=0.785983421250942, total=26.0min
[CV] colsample_bytree=0.9, subsample=0.5 .............................
[Parallel(n_jobs=8)]: Done 116 tasks      | elapsed: 328.9min
[CV]

[CV] colsample_bytree=1.0, subsample=0.6 .............................
[Parallel(n_jobs=8)]: Done 150 tasks      | elapsed: 444.3min
[CV]  colsample_bytree=0.9, subsample=1.0, score=0.7914936454007802, total=28.5min
[CV] colsample_bytree=1.0, subsample=0.6 .............................
[Parallel(n_jobs=8)]: Done 151 tasks      | elapsed: 444.6min
[CV]  colsample_bytree=1.0, subsample=0.5, score=0.7814777582307112, total=27.0min
[CV] colsample_bytree=1.0, subsample=0.6 .............................
[Parallel(n_jobs=8)]: Done 152 tasks      | elapsed: 446.1min
[CV]  colsample_bytree=1.0, subsample=0.5, score=0.7801659125188537, total=27.2min
[CV] colsample_bytree=1.0, subsample=0.7 .............................
[Parallel(n_jobs=8)]: Done 153 tasks      | elapsed: 462.5min
[CV]  colsample_bytree=1.0, subsample=0.5, score=0.7747453150547101, total=27.1min
[CV] colsample_bytree=1.0, subsample=0.7 .............................
[Parallel(n_jobs=8)]: Done 154 tasks      | elapsed: 465.2min
[CV

In [83]:
gsearch5.best_params_

{'colsample_bytree': 0.5, 'subsample': 1.0}

In [84]:
gsearch5.grid_scores_



[mean: 0.77986, std: 0.00636, params: {'colsample_bytree': 0.5, 'subsample': 0.5},
 mean: 0.78327, std: 0.00738, params: {'colsample_bytree': 0.5, 'subsample': 0.59999999999999998},
 mean: 0.78506, std: 0.00682, params: {'colsample_bytree': 0.5, 'subsample': 0.69999999999999996},
 mean: 0.78370, std: 0.00721, params: {'colsample_bytree': 0.5, 'subsample': 0.80000000000000004},
 mean: 0.78825, std: 0.00840, params: {'colsample_bytree': 0.5, 'subsample': 0.90000000000000002},
 mean: 0.78921, std: 0.00623, params: {'colsample_bytree': 0.5, 'subsample': 1.0},
 mean: 0.77938, std: 0.00824, params: {'colsample_bytree': 0.59999999999999998, 'subsample': 0.5},
 mean: 0.78287, std: 0.00689, params: {'colsample_bytree': 0.59999999999999998, 'subsample': 0.59999999999999998},
 mean: 0.78519, std: 0.00672, params: {'colsample_bytree': 0.59999999999999998, 'subsample': 0.69999999999999996},
 mean: 0.78586, std: 0.00589, params: {'colsample_bytree': 0.59999999999999998, 'subsample': 0.80000000000000

## Regularization parameters

reg_alpha  
reg_lambda

In [89]:
param_test6 = {
    'reg_alpha': np.logspace(-4, 2, 7),
    'reg_lambda': np.logspace(-4, 2, 7)
}
gsearch6 = GridSearchCV(estimator=XGBClassifier(
    learning_rate=0.3,
    n_estimators=263,
    max_depth=7,
    min_child_weight=1,
    gamma=0.35,
    subsample=1.0,
    colsample_bytree=0.5,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20),
                        param_grid=param_test6,
                        scoring='accuracy',
                        n_jobs=7,
                        iid=False,
                        cv=5,
                        verbose=50)

In [90]:
%%time
print_time()

gsearch6.fit(train_mat, train_labels)

print_time()

180408-212258
Fitting 5 folds for each of 49 candidates, totalling 245 fits
[CV] reg_alpha=0.0001, reg_lambda=0.0001 .............................
[CV] reg_alpha=0.0001, reg_lambda=0.0001 .............................
[CV] reg_alpha=0.0001, reg_lambda=0.0001 .............................
[CV] reg_alpha=0.0001, reg_lambda=0.0001 .............................
[CV] reg_alpha=0.0001, reg_lambda=0.0001 .............................
[CV] reg_alpha=0.0001, reg_lambda=0.001 ..............................
[CV] reg_alpha=0.0001, reg_lambda=0.001 ..............................
[CV]  reg_alpha=0.0001, reg_lambda=0.001, score=0.7842250690781211, total=17.3min
[CV] reg_alpha=0.0001, reg_lambda=0.001 ..............................
[Parallel(n_jobs=7)]: Done   1 tasks      | elapsed: 18.3min
[CV]  reg_alpha=0.0001, reg_lambda=0.001, score=0.796305604423222, total=17.4min
[CV] reg_alpha=0.0001, reg_lambda=0.001 ..............................
[Parallel(n_jobs=7)]: Done   2 tasks      | elapsed: 18.4min


[CV]  reg_alpha=0.001, reg_lambda=0.0001, score=0.7941693892937924, total=18.8min
[CV] reg_alpha=0.001, reg_lambda=0.001 ...............................
[Parallel(n_jobs=7)]: Done  37 tasks      | elapsed: 112.1min
[CV]  reg_alpha=0.001, reg_lambda=0.0001, score=0.7898441427853192, total=19.2min
[CV] reg_alpha=0.001, reg_lambda=0.001 ...............................
[Parallel(n_jobs=7)]: Done  38 tasks      | elapsed: 117.1min
[CV]  reg_alpha=0.001, reg_lambda=0.0001, score=0.7795245881021255, total=19.5min
[CV] reg_alpha=0.001, reg_lambda=0.01 ................................
[Parallel(n_jobs=7)]: Done  39 tasks      | elapsed: 117.9min
[CV]  reg_alpha=0.001, reg_lambda=0.0001, score=0.7904869762174406, total=19.3min
[CV] reg_alpha=0.001, reg_lambda=0.01 ................................
[Parallel(n_jobs=7)]: Done  40 tasks      | elapsed: 119.8min
[CV]  reg_alpha=0.001, reg_lambda=0.001, score=0.79454636843428, total=19.1min
[CV] reg_alpha=0.001, reg_lambda=0.01 .......................

[Parallel(n_jobs=7)]: Done  75 tasks      | elapsed: 214.2min
[CV]  reg_alpha=0.01, reg_lambda=0.001, score=0.7913678117528627, total=11.6min
[CV] reg_alpha=0.01, reg_lambda=0.01 .................................
[Parallel(n_jobs=7)]: Done  76 tasks      | elapsed: 223.3min
[CV]  reg_alpha=0.01, reg_lambda=0.001, score=0.7857322280833962, total=19.1min
[CV] reg_alpha=0.01, reg_lambda=0.01 .................................
[Parallel(n_jobs=7)]: Done  77 tasks      | elapsed: 224.6min
[CV]  reg_alpha=0.01, reg_lambda=0.001, score=0.796305604423222, total=19.1min
[CV] reg_alpha=0.01, reg_lambda=0.01 .................................
[Parallel(n_jobs=7)]: Done  78 tasks      | elapsed: 224.9min
[CV]  reg_alpha=0.01, reg_lambda=0.001, score=0.7858220211161387, total=19.0min
[CV] reg_alpha=0.01, reg_lambda=0.1 ..................................
[Parallel(n_jobs=7)]: Done  79 tasks      | elapsed: 225.5min
[CV]  reg_alpha=0.01, reg_lambda=0.001, score=0.774368004024651, total=19.0min
[CV] reg

[CV] reg_alpha=0.1, reg_lambda=0.1 ...................................
[Parallel(n_jobs=7)]: Done 114 tasks      | elapsed: 320.7min
[CV]  reg_alpha=0.1, reg_lambda=0.01, score=0.7830946998241648, total=18.5min
[CV] reg_alpha=0.1, reg_lambda=0.1 ...................................
[Parallel(n_jobs=7)]: Done 115 tasks      | elapsed: 323.4min
[CV]  reg_alpha=0.1, reg_lambda=0.001, score=0.7908644771611929, total=19.0min
[CV] reg_alpha=0.1, reg_lambda=0.1 ...................................
[Parallel(n_jobs=7)]: Done 116 tasks      | elapsed: 323.7min
[CV]  reg_alpha=0.1, reg_lambda=0.01, score=0.7768834108917118, total=11.3min
[CV] reg_alpha=0.1, reg_lambda=0.1 ...................................
[Parallel(n_jobs=7)]: Done 117 tasks      | elapsed: 324.6min
[CV]  reg_alpha=0.1, reg_lambda=0.01, score=0.7955516461422468, total=19.1min
[CV] reg_alpha=0.1, reg_lambda=0.1 ...................................
[Parallel(n_jobs=7)]: Done 118 tasks      | elapsed: 326.9min
[CV]  reg_alpha=0.1, r

[CV] reg_alpha=1.0, reg_lambda=0.1 ...................................
[Parallel(n_jobs=7)]: Done 153 tasks      | elapsed: 420.2min
[CV]  reg_alpha=1.0, reg_lambda=0.01, score=0.7865861331319995, total=15.4min
[CV] reg_alpha=1.0, reg_lambda=1.0 ...................................
[Parallel(n_jobs=7)]: Done 154 tasks      | elapsed: 420.6min
[CV]  reg_alpha=1.0, reg_lambda=0.01, score=0.774368004024651, total=19.4min
[CV] reg_alpha=1.0, reg_lambda=1.0 ...................................
[Parallel(n_jobs=7)]: Done 155 tasks      | elapsed: 423.6min
[CV]  reg_alpha=1.0, reg_lambda=0.1, score=0.7809595579000251, total=19.4min
[CV] reg_alpha=1.0, reg_lambda=1.0 ...................................
[Parallel(n_jobs=7)]: Done 156 tasks      | elapsed: 431.7min
[CV]  reg_alpha=1.0, reg_lambda=0.1, score=0.7762545591749466, total=15.3min
[CV] reg_alpha=1.0, reg_lambda=1.0 ...................................
[Parallel(n_jobs=7)]: Done 157 tasks      | elapsed: 433.7min
[CV]  reg_alpha=1.0, reg_l

[CV] reg_alpha=10.0, reg_lambda=1.0 ..................................
[Parallel(n_jobs=7)]: Done 192 tasks      | elapsed: 505.9min
[CV]  reg_alpha=10.0, reg_lambda=0.1, score=0.7571374669852848, total=12.6min
[CV] reg_alpha=10.0, reg_lambda=1.0 ..................................
[Parallel(n_jobs=7)]: Done 193 tasks      | elapsed: 510.8min
[CV]  reg_alpha=10.0, reg_lambda=0.1, score=0.7655857214680744, total=13.3min
[CV] reg_alpha=10.0, reg_lambda=10.0 .................................
[Parallel(n_jobs=7)]: Done 194 tasks      | elapsed: 511.0min
[CV]  reg_alpha=10.0, reg_lambda=1.0, score=0.76525998492841, total=10.0min
[CV] reg_alpha=10.0, reg_lambda=10.0 .................................
[Parallel(n_jobs=7)]: Done 195 tasks      | elapsed: 514.4min
[CV]  reg_alpha=10.0, reg_lambda=0.1, score=0.770605259846483, total=13.4min
[CV] reg_alpha=10.0, reg_lambda=10.0 .................................
[Parallel(n_jobs=7)]: Done 196 tasks      | elapsed: 515.3min
[CV]  reg_alpha=10.0, reg_

[CV]  reg_alpha=100.0, reg_lambda=0.1, score=0.6582358122561973, total= 7.8min
[CV] reg_alpha=100.0, reg_lambda=10.0 ................................
[Parallel(n_jobs=7)]: Done 231 tasks      | elapsed: 563.2min
[CV]  reg_alpha=100.0, reg_lambda=1.0, score=0.6493343381060035, total= 7.8min
[CV] reg_alpha=100.0, reg_lambda=10.0 ................................
[Parallel(n_jobs=7)]: Done 232 tasks      | elapsed: 563.5min
[CV]  reg_alpha=100.0, reg_lambda=1.0, score=0.653376933719029, total= 4.8min
[CV] reg_alpha=100.0, reg_lambda=10.0 ................................
[CV]  reg_alpha=100.0, reg_lambda=1.0, score=0.6493212669683258, total= 7.9min
[CV] reg_alpha=100.0, reg_lambda=100.0 ...............................
[CV]  reg_alpha=100.0, reg_lambda=1.0, score=0.6599974833270417, total= 7.8min
[CV] reg_alpha=100.0, reg_lambda=100.0 ...............................
[CV]  reg_alpha=100.0, reg_lambda=10.0, score=0.6507159005275056, total= 7.8min
[CV] reg_alpha=100.0, reg_lambda=100.0 ........

In [91]:
gsearch6.best_params_

{'reg_alpha': 0.001, 'reg_lambda': 1.0}

In [92]:
gsearch6.grid_scores_



[mean: 0.78556, std: 0.00566, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.0001},
 mean: 0.78690, std: 0.00620, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.001},
 mean: 0.78687, std: 0.00685, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.01},
 mean: 0.78612, std: 0.00647, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.10000000000000001},
 mean: 0.78933, std: 0.00604, params: {'reg_alpha': 0.0001, 'reg_lambda': 1.0},
 mean: 0.78813, std: 0.00596, params: {'reg_alpha': 0.0001, 'reg_lambda': 10.0},
 mean: 0.78134, std: 0.00522, params: {'reg_alpha': 0.0001, 'reg_lambda': 100.0},
 mean: 0.78747, std: 0.00529, params: {'reg_alpha': 0.001, 'reg_lambda': 0.0001},
 mean: 0.78697, std: 0.00574, params: {'reg_alpha': 0.001, 'reg_lambda': 0.001},
 mean: 0.78657, std: 0.00496, params: {'reg_alpha': 0.001, 'reg_lambda': 0.01},
 mean: 0.78667, std: 0.00636, params: {'reg_alpha': 0.001, 'reg_lambda': 0.10000000000000001},
 mean: 0.78949, std: 0.00700, params: {'reg_alpha': 0.001, 'reg_lambda': 1.0}

In [93]:
param_test7 = {
    'reg_alpha': np.logspace(-4, -2, 7),
    'reg_lambda': np.logspace(-1, 1, 7)
}
gsearch7 = GridSearchCV(estimator=XGBClassifier(
    learning_rate=0.3,
    n_estimators=263,
    max_depth=7,
    min_child_weight=1,
    gamma=0.35,
    subsample=1.0,
    colsample_bytree=0.5,
    objective='multi:softmax',
    scale_pos_weight=1,
    num_class=20),
                        param_grid=param_test7,
                        scoring='accuracy',
                        n_jobs=7,
                        iid=False,
                        cv=5,
                        verbose=50)

In [94]:
%%time
print_time()

gsearch7.fit(train_mat, train_labels)

print_time()

180409-101350
Fitting 5 folds for each of 49 candidates, totalling 245 fits
[CV] reg_alpha=0.0001, reg_lambda=0.1 ................................
[CV] reg_alpha=0.0001, reg_lambda=0.1 ................................
[CV] reg_alpha=0.0001, reg_lambda=0.1 ................................
[CV] reg_alpha=0.0001, reg_lambda=0.1 ................................
[CV] reg_alpha=0.0001, reg_lambda=0.1 ................................
[CV] reg_alpha=0.0001, reg_lambda=0.215443469003 .....................
[CV] reg_alpha=0.0001, reg_lambda=0.215443469003 .....................
[CV]  reg_alpha=0.0001, reg_lambda=0.215443469003, score=0.7832202964079377, total=11.9min
[CV] reg_alpha=0.0001, reg_lambda=0.215443469003 .....................
[Parallel(n_jobs=7)]: Done   1 tasks      | elapsed: 12.6min
[CV]  reg_alpha=0.0001, reg_lambda=0.1, score=0.7937924101533048, total=15.5min
[CV] reg_alpha=0.0001, reg_lambda=0.215443469003 .....................
[Parallel(n_jobs=7)]: Done   2 tasks      | elapsed: 

[CV]  reg_alpha=0.0001, reg_lambda=10.0, score=0.7899836416257707, total=19.3min
[CV] reg_alpha=0.000215443469003, reg_lambda=0.215443469003 ..........
[Parallel(n_jobs=7)]: Done  36 tasks      | elapsed: 97.3min
[CV]  reg_alpha=0.000215443469003, reg_lambda=0.1, score=0.7934154310128173, total=15.3min
[CV] reg_alpha=0.000215443469003, reg_lambda=0.215443469003 ..........
[Parallel(n_jobs=7)]: Done  37 tasks      | elapsed: 98.4min
[CV]  reg_alpha=0.000215443469003, reg_lambda=0.1, score=0.7797761287888316, total=16.3min
[CV] reg_alpha=0.000215443469003, reg_lambda=0.215443469003 ..........
[Parallel(n_jobs=7)]: Done  38 tasks      | elapsed: 107.3min
[CV]  reg_alpha=0.000215443469003, reg_lambda=0.1, score=0.7869532428355958, total=18.4min
[CV] reg_alpha=0.000215443469003, reg_lambda=0.464158883361 ..........
[Parallel(n_jobs=7)]: Done  39 tasks      | elapsed: 108.4min
[CV]  reg_alpha=0.000215443469003, reg_lambda=0.1, score=0.7879703032590915, total=20.4min
[CV] reg_alpha=0.00021544

[Parallel(n_jobs=7)]: Done  71 tasks      | elapsed: 203.3min
[CV]  reg_alpha=0.000215443469003, reg_lambda=10.0, score=0.788096136907009, total=20.7min
[CV] reg_alpha=0.000464158883361, reg_lambda=0.215443469003 ..........
[Parallel(n_jobs=7)]: Done  72 tasks      | elapsed: 203.3min
[CV]  reg_alpha=0.000464158883361, reg_lambda=0.1, score=0.7875816993464052, total=20.5min
[CV] reg_alpha=0.000464158883361, reg_lambda=0.215443469003 ..........
[Parallel(n_jobs=7)]: Done  73 tasks      | elapsed: 214.2min
[CV]  reg_alpha=0.000464158883361, reg_lambda=0.1, score=0.7758772481448875, total=20.6min
[CV] reg_alpha=0.000464158883361, reg_lambda=0.464158883361 ..........
[Parallel(n_jobs=7)]: Done  74 tasks      | elapsed: 214.3min
[CV]  reg_alpha=0.000464158883361, reg_lambda=0.215443469003, score=0.7791472770720664, total=12.7min
[CV] reg_alpha=0.000464158883361, reg_lambda=0.464158883361 ..........
[Parallel(n_jobs=7)]: Done  75 tasks      | elapsed: 216.5min
[CV]  reg_alpha=0.0004641588833

[Parallel(n_jobs=7)]: Done 107 tasks      | elapsed: 301.1min
[CV]  reg_alpha=0.001, reg_lambda=0.1, score=0.796305604423222, total=18.7min
[CV] reg_alpha=0.001, reg_lambda=0.215443469003 ......................
[Parallel(n_jobs=7)]: Done 108 tasks      | elapsed: 302.9min
[CV]  reg_alpha=0.001, reg_lambda=0.1, score=0.7771349515784178, total=18.8min
[CV] reg_alpha=0.001, reg_lambda=0.464158883361 ......................
[Parallel(n_jobs=7)]: Done 109 tasks      | elapsed: 307.2min
[CV]  reg_alpha=0.001, reg_lambda=0.1, score=0.7891028060903486, total=18.8min
[CV] reg_alpha=0.001, reg_lambda=0.464158883361 ......................
[Parallel(n_jobs=7)]: Done 110 tasks      | elapsed: 307.9min
[CV]  reg_alpha=0.001, reg_lambda=0.215443469003, score=0.7842250690781211, total=18.8min
[CV] reg_alpha=0.001, reg_lambda=0.464158883361 ......................
[Parallel(n_jobs=7)]: Done 111 tasks      | elapsed: 308.4min
[CV]  reg_alpha=0.001, reg_lambda=0.215443469003, score=0.7782668846685952, tota

[CV] reg_alpha=0.00215443469003, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 145 tasks      | elapsed: 396.3min
[CV]  reg_alpha=0.00215443469003, reg_lambda=0.215443469003, score=0.7818387339864356, total=18.9min
[CV] reg_alpha=0.00215443469003, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 146 tasks      | elapsed: 402.2min
[CV]  reg_alpha=0.00215443469003, reg_lambda=0.215443469003, score=0.7872046254399195, total=17.9min
[CV] reg_alpha=0.00215443469003, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 147 tasks      | elapsed: 402.6min
[CV]  reg_alpha=0.00215443469003, reg_lambda=0.215443469003, score=0.7961799447097261, total=18.9min
[CV] reg_alpha=0.00215443469003, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 148 tasks      | elapsed: 402.9min
[CV]  reg_alpha=0.00215443469003, reg_lambda=0.215443469003, score=0.7891028060903486, total=11.7min
[CV] reg_alpha=0.00215443469003, reg_lambda=1.0 ................

[CV] reg_alpha=0.00464158883361, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 181 tasks      | elapsed: 490.1min
[CV]  reg_alpha=0.00464158883361, reg_lambda=0.215443469003, score=0.7835970861592565, total=18.9min
[CV] reg_alpha=0.00464158883361, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 182 tasks      | elapsed: 494.8min
[CV]  reg_alpha=0.00464158883361, reg_lambda=0.215443469003, score=0.7984418195526514, total=18.6min
[CV] reg_alpha=0.00464158883361, reg_lambda=0.464158883361 ...........
[Parallel(n_jobs=7)]: Done 183 tasks      | elapsed: 495.5min
[CV]  reg_alpha=0.00464158883361, reg_lambda=0.215443469003, score=0.7788957363853604, total=18.9min
[CV] reg_alpha=0.00464158883361, reg_lambda=1.0 ......................
[Parallel(n_jobs=7)]: Done 184 tasks      | elapsed: 497.5min
[CV]  reg_alpha=0.00464158883361, reg_lambda=0.215443469003, score=0.7874669686674217, total=16.7min
[CV] reg_alpha=0.00464158883361, reg_lambda=1.0 ................

[Parallel(n_jobs=7)]: Done 217 tasks      | elapsed: 589.1min
[CV]  reg_alpha=0.01, reg_lambda=0.464158883361, score=0.7985674792661472, total=13.2min
[CV] reg_alpha=0.01, reg_lambda=0.464158883361 .......................
[Parallel(n_jobs=7)]: Done 218 tasks      | elapsed: 591.6min
[CV]  reg_alpha=0.01, reg_lambda=0.215443469003, score=0.7869532428355958, total=18.7min
[CV] reg_alpha=0.01, reg_lambda=1.0 ..................................
[Parallel(n_jobs=7)]: Done 219 tasks      | elapsed: 591.9min
[CV]  reg_alpha=0.01, reg_lambda=0.215443469003, score=0.7773864922651239, total=19.3min
[CV] reg_alpha=0.01, reg_lambda=1.0 ..................................
[Parallel(n_jobs=7)]: Done 220 tasks      | elapsed: 593.7min
[CV]  reg_alpha=0.01, reg_lambda=0.215443469003, score=0.7912419781049452, total=19.7min
[CV] reg_alpha=0.01, reg_lambda=1.0 ..................................
[Parallel(n_jobs=7)]: Done 221 tasks      | elapsed: 595.8min
[CV]  reg_alpha=0.01, reg_lambda=0.464158883361, s

In [95]:
gsearch7.best_params_

{'reg_alpha': 0.001, 'reg_lambda': 1.0}

In [96]:
gsearch7.grid_scores_



[mean: 0.78612, std: 0.00647, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.10000000000000001},
 mean: 0.78644, std: 0.00686, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.21544346900318834},
 mean: 0.78848, std: 0.00609, params: {'reg_alpha': 0.0001, 'reg_lambda': 0.46415888336127786},
 mean: 0.78933, std: 0.00604, params: {'reg_alpha': 0.0001, 'reg_lambda': 1.0},
 mean: 0.78659, std: 0.00572, params: {'reg_alpha': 0.0001, 'reg_lambda': 2.1544346900318834},
 mean: 0.78818, std: 0.00632, params: {'reg_alpha': 0.0001, 'reg_lambda': 4.6415888336127775},
 mean: 0.78813, std: 0.00596, params: {'reg_alpha': 0.0001, 'reg_lambda': 10.0},
 mean: 0.78637, std: 0.00454, params: {'reg_alpha': 0.00021544346900318845, 'reg_lambda': 0.10000000000000001},
 mean: 0.78702, std: 0.00604, params: {'reg_alpha': 0.00021544346900318845, 'reg_lambda': 0.21544346900318834},
 mean: 0.78727, std: 0.00645, params: {'reg_alpha': 0.00021544346900318845, 'reg_lambda': 0.46415888336127786},
 mean: 0.78720, std: 0.00