In [1]:
import numpy as np
import pandas as pd
from xgboost import *
from sklearn.metrics import *
from pandas.api.types import is_string_dtype, is_numeric_dtype
from sklearn.preprocessing import LabelEncoder
from sklearn.grid_search import GridSearchCV
import re




In [2]:
train = pd.read_feather('tmp/train-lat-long')
test = pd.read_feather('tmp/test-lat-long')

In [3]:
X = train.drop(['any_spot', 'Real.Spots'], axis=1)
Y = train['any_spot']

In [4]:
#Handles class imbalance through sampling
classDistribution = train['any_spot'].value_counts()
print('Class imbalance:')
print(classDistribution)
train = train.sample(frac=1).reset_index(drop=True)
zero = train[train['any_spot']==0].tail(classDistribution.min())
one = train[train['any_spot']==1]
train_data = zero.append(one)
del zero, one
train_data = train_data.sample(frac=1).reset_index(drop=True)
#del train
print('Class imbalance evened out:')
print(train_data['any_spot'].value_counts())

Class imbalance:
0    699
1    401
Name: any_spot, dtype: int64
Class imbalance evened out:
1    401
0    401
Name: any_spot, dtype: int64


In [4]:
X['lat_from'], X['lon_from'] = X.lat_long_from.str.split(',').str

_, X['lat_from'] = X.lat_from.str.split(':').str

X['lat_from'] =  X['lat_from'].astype(str).astype(float)

_, X['lon_from'] = X.lon_from.str.split(':').str
X['lon_from'] =  X['lon_from'].astype(str).astype(float)

X = X.drop(['lat_long_from'], axis = 1)

X['lat_to'], X['lon_to'] = X.lat_long_to.str.split(',').str

_, X['lat_to'] = X.lat_to.str.split(':').str
X['lat_to'] =  X['lat_to'].astype(str).astype(float)

_, X['lon_to'] = X.lon_to.str.split(':').str
X['lon_to'] =  X['lon_to'].astype(str).astype(float)

X = X.drop(['lat_long_to'], axis = 1)
X = X.drop(['combined'], axis = 1)

test['lat_from'], test['lon_from'] = test.lat_long_from.str.split(',').str

_, test['lat_from'] = test.lat_from.str.split(':').str

test['lat_from'] =  test['lat_from'].astype(str).astype(float)

_, test['lon_from'] = test.lon_from.str.split(':').str
test['lon_from'] =  test['lon_from'].astype(str).astype(float)

test = test.drop(['lat_long_from'], axis = 1)

test['lat_to'], test['lon_to'] = test.lat_long_to.str.split(',').str

_, test['lat_to'] = test.lat_to.str.split(':').str
test['lat_to'] =  test['lat_to'].astype(str).astype(float)

_, test['lon_to'] = test.lon_to.str.split(':').str
test['lon_to'] =  test['lon_to'].astype(str).astype(float)

test = test.drop(['lat_long_to'], axis = 1)
test = test.drop(['combined'], axis = 1)

In [5]:
X = X.drop(['STREET_NAME_x', 'STREET_NAME_y'], axis = 1)
test = test.drop(['STREET_NAME_x', 'STREET_NAME_y'], axis = 1)

category = ["Street", "From", "To","from_to"]

le = LabelEncoder()
for cat in category:
    le.fit(X[cat])
    X[cat] = le.transform(X[cat])
    test[cat] = le.transform(test[cat])

X = X.fillna(0)
test = test.fillna(0)

In [25]:
param_grid = {'n_estimators' : [200, 500],
'min_child_weight':[2, 4, 6], 
'learning_rate' : [ 0.1, 0.2, 0.3],
'subsample' : [0.6, 0.8],
'colsample_bytree' : [0.5, 0.8],
'max_depth':[2, 4, 6]
}

f_scorer = make_scorer(fbeta_score, beta=0.5)

gd_search = GridSearchCV(estimator = XGBClassifier(objective = 'binary:logistic', nthread = 4, scale_pos_weight = 1, seed = 1), 
param_grid = param_grid, scoring ='roc_auc', iid = False, cv = 5, verbose = 5, n_jobs = -1)
gd_search.fit(X, Y)


Fitting 5 folds for each of 216 candidates, totalling 1080 fits
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rat

[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.3s


[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.526071 -   0.5s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.860342 -   0.6s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.594709 -   0.6s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.267232 -   0.6s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.552143 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.558113 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.575838 -   0.4s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.372946 -   0.4s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    5.6s


[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.832914 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.434480 -   1.0s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.540536 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.288929 -   1.0s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.422946 -   1.8s
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.281339 -   1.9s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.551339 -   1.9s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.504938 -   1.8s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.524250 -   1.4s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.820414 -   2.2s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.271518 -   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.567768 -   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]

[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   23.6s


[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.565536 -   2.7s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.554821 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.810342 -   2.9s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.816637 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.537679 -   0.4s
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.507319 -   3.0s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.440089 -   3.0s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.841906 -   0.6s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.529464 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.427232 -   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.814838 -   1.4s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.226071 -   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.813399 -   1.4s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.270268 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.452321 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.814209 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]

[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:   49.2s


[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.429806 -   2.8s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.434821 -   3.2s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.797392 -   3.2s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.259107 -   2.9s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.310268 -   1.5s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.534107 -   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.456875 -   1.5s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.803147 -   1.4s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.300714 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.432768 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.549018 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.821763 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.794694 -   2.8s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.438393 -   1.4s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.792896 -   1.4s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.470370 -   1.5s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.530714 -   1.3s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.453482 -   0.5s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.783993 -   1.3s
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.552411 -   0.5s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8, sco

[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  1.3min


[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.426607 -   2.2s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.428483 -   2.2s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.291786 -   0.8s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.464109 -   0.9s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.537143 -   2.3s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.804227 -   2.4s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.808903 -   1.0s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.6, score=0.273839 -   2.0s
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.6, sco

[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.462143 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.533304 -   1.1s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.781655 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.508818 -   1.2s
[CV] colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.6, score=0.250804 -   2.4s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.279643 -   2.3s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.514550 -   0.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.5, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.502205 -   2.4s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.276250 -   0.7s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.581250 -   0.7s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.845594 -   1.6s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.859442 -   0.7s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.441875 -   1.1s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.285536 -   1.3s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.551250 -   1.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.844245 -   1.4s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]

[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  2.0min


[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.427589 -   2.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.546429 -   2.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.816996 -   2.6s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.428304 -   2.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.829406 -   2.8s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.281964 -   1.7s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.461696 -   1.7s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.539375 -   1.8s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.555820 -   1.4s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.286071 -   1.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.446518 -   1.4s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.584107 -   1.5s
[CV] colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.1, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.815108 -   3.9s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.433571 -   1.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.545536 -   1.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.818435 -   1.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.538929 -   1.7s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.532500 -   0.6s
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.263304 -   0.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.471875 -   0.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.437054 -   3.3s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.486772 -   1.3s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.278036 -   1.4s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.6, score=0.541696 -   1.2s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=4, n_estimators=200, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.6, score=0.551411 -   3.1s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.6, score=0.421339 -   3.0s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.6, score=0.244107 -   3.3s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=4, min_child_weight=6, n_estimators=500, subsample=0.6, score=0.809173 -   3.0s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.513757 -   1.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.278750 -   1.7s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.426964 -   1.6s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 


[Parallel(n_jobs=-1)]: Done 866 tasks      | elapsed:  3.1min


[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.810342 -   4.2s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8, score=0.555625 -   4.4s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.534286 -   1.8s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.8, score=0.819155 -   2.0s
[CV] colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.2, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.440625 -   3.5s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.413125 -   0.7s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, score=0.833903 -   0.7s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.514638 -   0.7s
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=2, n_estimators=200, subsample=0.6, sco

[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.434107 -   1.7s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.546339 -   1.8s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.6, score=0.786421 -   1.7s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.488272 -   1.8s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=2, min_child_weight=6, n_estimators=500, subsample=0.8, score=0.820594 -   1.8s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.263482 -   1.2s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.456518 -   1.3s
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=200, subsample=0.8, score=0.786241 -   1.2s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.259732 -   1.1s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.500353 -   1.3s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.432143 -   1.3s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.6, score=0.807554 -   1.1s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=4, min_child_weight=6, n_estimators=200, subsample=0.8 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.314554 -   3.6s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.8 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.445982 -   3.8s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.561250 -   3.7s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=2, n_estimators=500, subsample=0.6, score=0.806025 -   3.6s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=200, subsample=0.6 
[CV]

[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.271429 -   1.4s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=200, subsample=0.8, score=0.448571 -   1.4s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.540982 -   3.4s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.6 
[CV]  colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=4, n_estimators=500, subsample=0.8, score=0.801619 -   3.4s
[CV] colsample_bytree=0.8, learning_rate=0.3, max_depth=6, min_child_weight=6, n_estimators=500, subsample=0.8 
[CV]

[Parallel(n_jobs=-1)]: Done 1080 out of 1080 | elapsed:  4.0min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=4, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=1, silent=True,
       subsample=1),
       fit_params={}, iid=False, n_jobs=-1,
       param_grid={'n_estimators': [200, 500], 'min_child_weight': [2, 4, 6], 'learning_rate': [0.1, 0.2, 0.3], 'subsample': [0.6, 0.8], 'colsample_bytree': [0.5, 0.8], 'max_depth': [2, 4, 6]},
       pre_dispatch='2*n_jobs', refit=True, scoring='roc_auc', verbose=5)

In [26]:
gd_search.best_params_, gd_search.best_score_

({'colsample_bytree': 0.5,
  'learning_rate': 0.1,
  'max_depth': 2,
  'min_child_weight': 6,
  'n_estimators': 200,
  'subsample': 0.8},
 0.5952513116490934)

In [6]:
import xgboost as xgb
dtrain = xgb.DMatrix(X, label=Y)

In [7]:
from sklearn.cross_validation import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size = .2, random_state=5)

In [8]:
dX_train = xgb.DMatrix(X_train, label=y_train)
dX_val = xgb.DMatrix(X_val, label=y_val)

In [27]:
params = {
    'max_depth':2,
    'min_child_weight': 6,
    'eta':0.1,
    'subsample': 0.8,
    'colsample_bytree': 0.5,
    'objective':'binary:logistic',
    'eval_metric':"auc"
}

params['eval_metric'] = 'error'
params['nthread'] = 4
params['seed'] = 1
params["booster"] = "gbtree"

In [28]:
watchlist = [(dX_val, 'eval')]
watchlist

[(<xgboost.core.DMatrix at 0x1a0dcea438>, 'eval')]

In [29]:
gbm = xgb.train(params, dtrain = dtrain, num_boost_round = 200, evals = watchlist, 
            early_stopping_rounds=100, 
            maximize=False, verbose_eval=10)

#gbm = train(params, dtrain, num_boost_round = 200)

[0]	eval-error:0.363636
Will train until eval-error hasn't improved in 100 rounds.
[10]	eval-error:0.263636
[20]	eval-error:0.272727
[30]	eval-error:0.263636
[40]	eval-error:0.259091
[50]	eval-error:0.25
[60]	eval-error:0.25
[70]	eval-error:0.240909
[80]	eval-error:0.245455
[90]	eval-error:0.236364
[100]	eval-error:0.227273
[110]	eval-error:0.218182
[120]	eval-error:0.204545
[130]	eval-error:0.204545
[140]	eval-error:0.209091
[150]	eval-error:0.209091
[160]	eval-error:0.204545
[170]	eval-error:0.186364
[180]	eval-error:0.177273
[190]	eval-error:0.177273
[199]	eval-error:0.172727


In [30]:
y_val_pred = gbm.predict(dX_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0

In [31]:
fbeta_score(beta=0.5, y_pred=y_val_pred, y_true=y_val)

0.81159420289855089

In [32]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_val, y_val_pred)
cm

array([[126,   9],
       [ 29,  56]])

In [33]:
print('Precision: ' + str(np.round(100*float((cm[1][1]))/float((cm[1][1]+cm[0][1])),2))+'%')
print('Recall: ' + str(np.round(100*float((cm[1][1]))/float((cm[1][0]+cm[1][1])),2))+'%')

Precision: 86.15%
Recall: 65.88%


In [34]:
dtest = xgb.DMatrix(test)
dtest

<xgboost.core.DMatrix at 0x1a0dd04358>

In [35]:
y_pred = gbm.predict(dtest)

In [36]:
test.columns

Index(['Street', 'From', 'To', 'Street.Length', 'from_to', 'occ_per_1',
       'vac_per_1', 'occ_per_2', 'vac_per_2', 'occ_per_3', 'vac_per_3',
       'occ_per_4', 'vac_per_4', 'count', 'Month', 'Week', 'Day', 'Dayofweek',
       'Dayofyear', 'hr', 'min', 'isweekday', 'mean_from_to_any',
       'mean_from_to_real', 'mean_isweekday_any', 'mean_isweekday_real',
       'mean_hr_any', 'mean_hr_real', 'mean_dow_any', 'mean_dow_real',
       'mean_month_any', 'mean_month_real', 'lat_from', 'lon_from', 'lat_to',
       'lon_to'],
      dtype='object')

In [37]:
y = y_pred
y[y >= 0.5] = 1
y[y < 0.5] = 0

In [38]:
sum(y == 0), sum(y == 1)

(624, 102)

In [39]:
prediction = {"id":range(1,727), "any_spot":y}

In [40]:
sub_v3 = pd.DataFrame(prediction)

In [41]:
sub_v3 = sub_v3.astype("int")

In [42]:
sub_v3.to_csv("xgb_0306Tina_best_set_hypa.csv", index =False)