### Load libraries

In [1]:
#General
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
from math import radians, cos, sin, asin, sqrt
import time
import math
import matplotlib.pylab as plt
import datetime as dt
#Scikit
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, KFold
from sklearn import ensemble
from sklearn.metrics import mean_squared_error
from sklearn.cluster import MiniBatchKMeans
#XGBoost
import xgboost as xgb
from xgboost.sklearn import XGBRegressor




### Loading data

In [2]:
dfTest = pd.read_csv("Data/test.csv")
dfTrain = pd.read_csv("Data/train.csv")

#OSRM Data
OSRMtest = pd.read_csv("Data/OSRMtest.csv")
OSRMtrain = pd.read_csv("Data/OSRMtrain.csv")

dfTrain["OSMR_distance"] = OSRMtrain["total_distance"]
dfTrain["OSMR_duration"] = OSRMtrain["total_duration"]

dfTest["OSMR_distance"] = OSRMtest["total_distance"]
dfTest["OSMR_duration"] = OSRMtest["total_duration"]

### Define functions needed

In [3]:
#Adding distance in km
# credit to: https://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    return km

### Feature engineering

In [4]:
#Concat data for featuring engineering
features = dfTest.columns
label = "trip_duration"
data = pd.concat([dfTrain[features], dfTest], keys=['train','test'])

#Clustering on location in NY
t0 = dt.datetime.now()
coords = np.vstack((data[['pickup_latitude', 'pickup_longitude']].values,
                    data[['dropoff_latitude', 'dropoff_longitude']].values))
sample_ind = np.random.permutation(len(coords))[:500000]
kmeans = MiniBatchKMeans(n_clusters=100, batch_size=10000).fit(coords[sample_ind])

data['pickup_cluster'] = kmeans.predict(data[['pickup_latitude', 'pickup_longitude']])
data['dropoff_cluster'] = kmeans.predict(data[['dropoff_latitude', 'dropoff_longitude']])
t1 = dt.datetime.now()
print('Clustering time : %i seconds' % (t1 - t0).seconds)

#Add datetime infos in diff columns
data['pickup_datetime'] = pd.to_datetime(data['pickup_datetime'])
data['pickup_year'] = data['pickup_datetime'].dt.year
data['pickup_month'] = data['pickup_datetime'].dt.month
data['pickup_weekday'] = data['pickup_datetime'].dt.weekday
data['pickup_day'] = data['pickup_datetime'].dt.day
data['pickup_hour'] = data['pickup_datetime'].dt.hour
data['pickup_minute'] = data['pickup_datetime'].dt.minute

data.drop('pickup_datetime', axis=1, inplace=True)
data.drop('pickup_year', axis=1, inplace=True)

#Change storeflag in continue
data_dict = {'Y':1, 'N':0}
data_tf = data['store_and_fwd_flag'].map(data_dict)
data['store_and_fwd_flag'].update(data_tf)

#Add distance (takes a while)
t0 = dt.datetime.now()
data['distance'] = data.apply(lambda row: haversine(row['pickup_latitude'], row['pickup_longitude'], row['dropoff_latitude'], row['dropoff_longitude']), axis=1)
t1 = dt.datetime.now()
print('Computing distance : %i seconds' % (t1 - t0).seconds)

#Drop tails
train_set = data.loc['train']
test_set = data.loc['test']
data_temp = train_set
data_temp[label] = dfTrain[label]
data_temp = data_temp[data_temp[label] < 1800000]
target = data_temp[label]
target_log = np.log(target+1)
train_set = data_temp
data = train_set

#Drop id and labels
data = train_set.drop(['id', label], axis=1).astype(float)

Clustering time : 3 seconds
Computing distance : 156 seconds


### Train model XGBoost with optimal parameters

In [16]:
#Best params obtained with hypertunning below

model = XGBRegressor(
 learning_rate =0.1,
 n_estimators=10000,
 max_depth=7,
 min_child_weight=3,
 gamma=0.4,
 subsample=0.9,
 colsample_bytree=0.9,
 objective= 'reg:linear',
 nthread=-1,
 scale_pos_weight=1,
 reg_alpha=100,
 seed=27)

XGBmodelfit(xgb1, data, target_log)

X_train: (1239844, 17)  Y_train: (1239844,)  X_test: (218796, 17)  Y_test: (218796,)
[0]	validation_0-rmse:5.42159
Will train until validation_0-rmse hasn't improved in 50 rounds.
[20]	validation_0-rmse:0.79084
[40]	validation_0-rmse:0.425122
[60]	validation_0-rmse:0.407884
[80]	validation_0-rmse:0.403298
[100]	validation_0-rmse:0.400656
[120]	validation_0-rmse:0.397921
[140]	validation_0-rmse:0.395894
[160]	validation_0-rmse:0.394543
[180]	validation_0-rmse:0.393253
[200]	validation_0-rmse:0.39211
[220]	validation_0-rmse:0.390937
[240]	validation_0-rmse:0.389834
[260]	validation_0-rmse:0.388906
[280]	validation_0-rmse:0.388168
[300]	validation_0-rmse:0.387335
[320]	validation_0-rmse:0.386684
[340]	validation_0-rmse:0.38609
[360]	validation_0-rmse:0.385463
[380]	validation_0-rmse:0.385048
[400]	validation_0-rmse:0.384609
[420]	validation_0-rmse:0.384177
[440]	validation_0-rmse:0.383732
[460]	validation_0-rmse:0.383446
[480]	validation_0-rmse:0.383087
[500]	validation_0-rmse:0.382727
[5

[4800]	validation_0-rmse:0.374544
[4820]	validation_0-rmse:0.374542
[4840]	validation_0-rmse:0.374532
[4860]	validation_0-rmse:0.374526
[4880]	validation_0-rmse:0.374521
[4900]	validation_0-rmse:0.374517
[4920]	validation_0-rmse:0.37451
[4940]	validation_0-rmse:0.374507
[4960]	validation_0-rmse:0.3745
[4980]	validation_0-rmse:0.374493
[5000]	validation_0-rmse:0.374487
[5020]	validation_0-rmse:0.374483
[5040]	validation_0-rmse:0.374482
[5060]	validation_0-rmse:0.37448
[5080]	validation_0-rmse:0.374476
[5100]	validation_0-rmse:0.374459
[5120]	validation_0-rmse:0.374454
[5140]	validation_0-rmse:0.374453
[5160]	validation_0-rmse:0.374456
[5180]	validation_0-rmse:0.374447
[5200]	validation_0-rmse:0.374444
[5220]	validation_0-rmse:0.374442
[5240]	validation_0-rmse:0.374439
[5260]	validation_0-rmse:0.374435
[5280]	validation_0-rmse:0.374431
[5300]	validation_0-rmse:0.374432
[5320]	validation_0-rmse:0.374426
[5340]	validation_0-rmse:0.374418
[5360]	validation_0-rmse:0.374408
[5380]	validation_

### Submission

In [17]:
dataSub = test_set.drop('id', axis=1).astype(float)
Y_eval_log = model.predict(dataSub)
Y_eval = np.exp(Y_eval_log.ravel())-1
submit_file = pd.DataFrame({'id': test_set['id'], 'trip_duration': Y_eval})
submit_file.to_csv('Data/submission.csv',index=False)

### Tunning hyperparameters for XGBoost

Define function that we are going to use.

In [12]:
def XGBmodelfit(alg, Xtrain, Ytrain):
    
    t0 = dt.datetime.now()
    
    #Get Parameters
    xgb_param = alg.get_xgb_params()
    
    #Define matrix
    xgtrain = xgb.DMatrix(Xtrain.values, label=Ytrain.values)
    
    #Define model for CV
    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=2,
            metrics='rmse', early_stopping_rounds=50, verbose_eval=10)
    
    #Set optimal n_estimators to alg
    alg.set_params(n_estimators=cvresult.shape[0])
    
    #Fit the algorithm on the data
    alg.fit(Xtrain, Ytrain,eval_metric='rmse')
        
    #Predict training set:
    dtrain_predictions = alg.predict(Xtrain)
    
    t1 = dt.datetime.now()
        
    #Print model report:
    print("\nModel Report")
    print("\nBest n_estimators : " + str(cvresult.shape[0]))
    print("RMSE : %.4g" % mean_squared_error(Ytrain.values, dtrain_predictions))
    print('Training time: %i seconds' % (t1 - t0).seconds)

#### Step 1: Fix learning rate and number of estimators for tuning tree-based parameters

In [108]:
xgb1 = XGBRegressor(
 learning_rate =0.3,
 n_estimators=10000,
 max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'reg:linear',
 scale_pos_weight=1,
 nthread=-1,
 seed=27)

XGBmodelfit(xgb1, data, target_log)

[0]	train-rmse:4.22737+1.4e-05	test-rmse:4.22736+0.0002195
[10]	train-rmse:0.448065+0.000419	test-rmse:0.449236+0.0009345
[20]	train-rmse:0.414902+0.000226	test-rmse:0.417081+0.0006475
[30]	train-rmse:0.408299+0.000398	test-rmse:0.41152+6.85e-05
[40]	train-rmse:0.404025+0.000993	test-rmse:0.40815+0.0003455
[50]	train-rmse:0.400341+0.000996	test-rmse:0.405595+0.0005045
[60]	train-rmse:0.397861+0.0008125	test-rmse:0.4042+0.0006805
[70]	train-rmse:0.395646+0.001084	test-rmse:0.403033+0.0009105
[80]	train-rmse:0.393298+0.0004945	test-rmse:0.401486+0.0002455
[90]	train-rmse:0.391382+0.0004045	test-rmse:0.400841+0.0002245
[100]	train-rmse:0.389838+0.00046	test-rmse:0.400544+0.0002115
[110]	train-rmse:0.388278+0.0005205	test-rmse:0.399861+0.0003195
[120]	train-rmse:0.386961+0.0007305	test-rmse:0.39954+0.000405
[130]	train-rmse:0.385317+0.0007045	test-rmse:0.39885+0.000351
[140]	train-rmse:0.383711+0.0006935	test-rmse:0.398363+0.0004185
[150]	train-rmse:0.382451+0.0009075	test-rmse:0.398124+0.

#### Step 2: Tune max_depth and min_child_weight

In [112]:
param_test1 = {
 'max_depth':range(3,10,2),
 'min_child_weight':range(1,6,2)
}

xgb1 = XGBRegressor(
 learning_rate =0.3,
 n_estimators=665,
 #max_depth=5,
 #min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'reg:linear',
 nthread=-1,
 scale_pos_weight=1,
 seed=27)

gsearch1 = GridSearchCV(xgb1 , param_grid = param_test1, scoring='neg_mean_squared_error',n_jobs=-1,iid=False, cv=2, verbose=20)

#Fit
gsearch1.fit(data, target_log)

#Print scores
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

Fitting 2 folds for each of 12 candidates, totalling 24 fits


[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  9.5min
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  9.5min
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed: 10.5min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed: 11.3min
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 12.2min
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed: 12.4min
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed: 16.7min
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed: 21.1min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 25.6min
[Parallel(n_jobs=-1)]: Done  11 out of  24 | elapsed: 28.7min remaining: 33.9min
[Parallel(n_jobs=-1)]: Done  13 out of  24 | elapsed: 41.1min remaining: 34.8min
[Parallel(n_jobs=-1)]: Done  15 out of  24 | elapsed: 44.5min remaining: 26.7min
[Parallel(n_jobs=-1)]: Done  17 out of  24 | elapsed: 52.4min remaining: 21.6min
[Parallel(n_jobs=-1)]: Done  19 out of  24 | elapsed: 63.1min remaining: 16.6min
[Parallel(n_jobs=-1)]: Done  21 out o

([mean: -0.16060, std: 0.00012, params: {'max_depth': 3, 'min_child_weight': 1},
  mean: -0.16080, std: 0.00041, params: {'max_depth': 3, 'min_child_weight': 3},
  mean: -0.16076, std: 0.00007, params: {'max_depth': 3, 'min_child_weight': 5},
  mean: -0.15540, std: 0.00027, params: {'max_depth': 5, 'min_child_weight': 1},
  mean: -0.15551, std: 0.00002, params: {'max_depth': 5, 'min_child_weight': 3},
  mean: -0.15535, std: 0.00020, params: {'max_depth': 5, 'min_child_weight': 5},
  mean: -0.15503, std: 0.00008, params: {'max_depth': 7, 'min_child_weight': 1},
  mean: -0.15453, std: 0.00005, params: {'max_depth': 7, 'min_child_weight': 3},
  mean: -0.15494, std: 0.00041, params: {'max_depth': 7, 'min_child_weight': 5},
  mean: -0.15749, std: 0.00007, params: {'max_depth': 9, 'min_child_weight': 1},
  mean: -0.15803, std: 0.00052, params: {'max_depth': 9, 'min_child_weight': 3},
  mean: -0.15789, std: 0.00002, params: {'max_depth': 9, 'min_child_weight': 5}],
 {'max_depth': 7, 'min_chil

In [113]:
XGBmodelfit(gsearch1.best_estimator_, data, target_log)

[0]	train-rmse:4.22629+4.5e-05	test-rmse:4.22636+0.0002265
[10]	train-rmse:0.430665+0.0006635	test-rmse:0.43444+0.001239
[20]	train-rmse:0.398298+0.000124	test-rmse:0.406546+0.000459
[30]	train-rmse:0.39004+0.000779	test-rmse:0.401718+0.0002185
[40]	train-rmse:0.383791+0.001006	test-rmse:0.398533+0.000118
[50]	train-rmse:0.379731+0.0014515	test-rmse:0.397421+0.0001825
[60]	train-rmse:0.376082+0.0015195	test-rmse:0.396543+0.0004905
[70]	train-rmse:0.372679+0.001158	test-rmse:0.39586+0.000224
[80]	train-rmse:0.369534+0.0012065	test-rmse:0.395027+0.0001165
[90]	train-rmse:0.367053+0.000715	test-rmse:0.394624+0.000178
[100]	train-rmse:0.364228+0.0002015	test-rmse:0.394314+0.000564
[110]	train-rmse:0.361997+0.000426	test-rmse:0.394221+0.000201
[120]	train-rmse:0.359851+0.0004415	test-rmse:0.394006+0.0003505
[130]	train-rmse:0.357794+0.000168	test-rmse:0.393686+0.0005285
[140]	train-rmse:0.355931+0.000256	test-rmse:0.39342+0.0004945
[150]	train-rmse:0.353912+0.00018	test-rmse:0.393379+0.0005

#### Step 3: Tune gamma

In [5]:
param_test1 = {
 'gamma':[i/10.0 for i in range(0,5)]
}

xgb1 = XGBRegressor(
 learning_rate =0.3,
 n_estimators=665,
 max_depth=7,
 min_child_weight=3,
 #gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'reg:linear',
 nthread=-1,
 scale_pos_weight=1,
 seed=27)

gsearch1 = GridSearchCV(xgb1 , param_grid = param_test1, scoring='neg_mean_squared_error',n_jobs=-1,iid=False, cv=2, verbose=20)

#Fit
gsearch1.fit(data, target_log)

#Print scores
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

Fitting 2 folds for each of 5 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 23.6min
[Parallel(n_jobs=-1)]: Done   2 out of  10 | elapsed: 24.9min remaining: 99.6min
[Parallel(n_jobs=-1)]: Done   3 out of  10 | elapsed: 25.7min remaining: 59.9min
[Parallel(n_jobs=-1)]: Done   4 out of  10 | elapsed: 25.8min remaining: 38.6min
[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed: 26.3min remaining: 26.3min
[Parallel(n_jobs=-1)]: Done   6 out of  10 | elapsed: 26.9min remaining: 17.9min
[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed: 27.2min remaining: 11.7min
[Parallel(n_jobs=-1)]: Done   8 out of  10 | elapsed: 27.5min remaining:  6.9min
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 32.9min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 32.9min finished


([mean: -0.15457, std: 0.00001, params: {'gamma': 0.0},
  mean: -0.15500, std: 0.00002, params: {'gamma': 0.1},
  mean: -0.15476, std: 0.00055, params: {'gamma': 0.2},
  mean: -0.15469, std: 0.00016, params: {'gamma': 0.3},
  mean: -0.15452, std: 0.00010, params: {'gamma': 0.4}],
 {'gamma': 0.4},
 -0.15451592362663652)

#### Step 4: Tune subsample and colsample_bytree

In [8]:
param_test1 = {
 'subsample':[i/10.0 for i in range(9,13)],
 'colsample_bytree':[i/10.0 for i in range(9,13)]
}

xgb1 = XGBRegressor(
 learning_rate =0.3,
 n_estimators=665,
 max_depth=7,
 min_child_weight=3,
 gamma=0.4,
 #subsample=0.8,
 #colsample_bytree=0.8,
 objective= 'reg:linear',
 nthread=-1,
 scale_pos_weight=1,
 seed=27)

gsearch1 = GridSearchCV(xgb1 , param_grid = param_test1, scoring='neg_mean_squared_error',n_jobs=-1,iid=False, cv=2, verbose=20)

#Fit
gsearch1.fit(data, target_log)

#Print scores
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

Fitting 2 folds for each of 16 candidates, totalling 32 fits


[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed: 25.3min
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed: 26.2min
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed: 26.6min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed: 27.0min
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 27.5min
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed: 27.9min
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed: 28.0min


JoblibXGBoostError: JoblibXGBoostError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\Clement\Anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel.__main__', loader=<_f...da3\\lib\\site-packages\\ipykernel\\__main__.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\Clement\Anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x0000026C44A04C00, fil...lib\site-packages\ipykernel\__main__.py", line 1>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\__pycache__\__main__.cpython-36.pyc', '__doc__': None, '__file__': r'C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\__main__.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': 'ipykernel', '__spec__': ModuleSpec(name='ipykernel.__main__', loader=<_f...da3\\lib\\site-packages\\ipykernel\\__main__.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\C...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel.__main__', loader=<_f...da3\\lib\\site-packages\\ipykernel\\__main__.py'), pkg_name='ipykernel', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x0000026C44A04C00, fil...lib\site-packages\ipykernel\__main__.py", line 1>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\__pycache__\__main__.cpython-36.pyc', '__doc__': None, '__file__': r'C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\__main__.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': 'ipykernel', '__spec__': ModuleSpec(name='ipykernel.__main__', loader=<_f...da3\\lib\\site-packages\\ipykernel\\__main__.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\C...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    469             return self.subapp.start()
    470         if self.poller is not None:
    471             self.poller.start()
    472         self.kernel.start()
    473         try:
--> 474             ioloop.IOLoop.instance().start()
    475         except KeyboardInterrupt:
    476             pass
    477 
    478 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\tornado\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    882                 self._events.update(event_pairs)
    883                 while self._events:
    884                     fd, events = self._events.popitem()
    885                     try:
    886                         fd_obj, handler_func = self._handlers[fd]
--> 887                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    888                     except (OSError, IOError) as e:
    889                         if errno_from_exception(e) == errno.EPIPE:
    890                             # Happens when the client closes the connection
    891                             pass

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': '2017-07-27T17:21:25.201603', 'msg_id': 'F6442900ED8E440388C153CEDDF429DE', 'msg_type': 'execute_request', 'session': '4C4C247EF88041718B6CCA98C0B0ACC8', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'F6442900ED8E440388C153CEDDF429DE', 'msg_type': 'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'4C4C247EF88041718B6CCA98C0B0ACC8']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': '2017-07-27T17:21:25.201603', 'msg_id': 'F6442900ED8E440388C153CEDDF429DE', 'msg_type': 'execute_request', 'session': '4C4C247EF88041718B6CCA98C0B0ACC8', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'F6442900ED8E440388C153CEDDF429DE', 'msg_type': 'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'4C4C247EF88041718B6CCA98C0B0ACC8'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': '2017-07-27T17:21:25.201603', 'msg_id': 'F6442900ED8E440388C153CEDDF429DE', 'msg_type': 'execute_request', 'session': '4C4C247EF88041718B6CCA98C0B0ACC8', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'F6442900ED8E440388C153CEDDF429DE', 'msg_type': 'execute_request', 'parent_header': {}})
    385         if not silent:
    386             self.execution_count += 1
    387             self._publish_execute_input(code, parent, self.execution_count)
    388 
    389         reply_content = self.do_execute(code, silent, store_history,
--> 390                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    391 
    392         # Flush output before sending the reply.
    393         sys.stdout.flush()
    394         sys.stderr.flush()

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_",), **kwargs={'silent': False, 'store_history': True})
    496             )
    497         self.payload_manager.write_payload(payload)
    498 
    499     def run_cell(self, *args, **kwargs):
    500         self._last_traceback = None
--> 501         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_",)
        kwargs = {'silent': False, 'store_history': True}
    502 
    503     def _showtraceback(self, etype, evalue, stb):
    504         # try to preserve ordering of tracebacks and print statements
    505         sys.stdout.flush()

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", store_history=True, silent=False, shell_futures=True)
   2712                 self.displayhook.exec_result = result
   2713 
   2714                 # Execute the user code
   2715                 interactivity = "none" if silent else self.ast_node_interactivity
   2716                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2717                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2718                 
   2719                 self.last_execution_succeeded = not has_raised
   2720 
   2721                 # Reset this so later displayed values do not modify the

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-8-d95a459dc1ce>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 26c4a985080, executio..._before_exec=None error_in_exec=None result=None>)
   2816 
   2817         try:
   2818             for i, node in enumerate(to_run_exec):
   2819                 mod = ast.Module([node])
   2820                 code = compiler(mod, cell_name, "exec")
-> 2821                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x0000026C3F88F4B0, file "<ipython-input-8-d95a459dc1ce>", line 22>
        result = <ExecutionResult object at 26c4a985080, executio..._before_exec=None error_in_exec=None result=None>
   2822                     return True
   2823 
   2824             for i, node in enumerate(to_run_interactive):
   2825                 mod = ast.Interactive([node])

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x0000026C3F88F4B0, file "<ipython-input-8-d95a459dc1ce>", line 22>, result=<ExecutionResult object at 26c4a985080, executio..._before_exec=None error_in_exec=None result=None>)
   2876         outflag = 1  # happens in more places, so it's easier as default
   2877         try:
   2878             try:
   2879                 self.hooks.pre_run_code_hook()
   2880                 #rprint('Running code', repr(code_obj)) # dbg
-> 2881                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x0000026C3F88F4B0, file "<ipython-input-8-d95a459dc1ce>", line 22>
        self.user_global_ns = {'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', '#General\nimport pandas as pd\npd.options.mode.cha...t as xgb\nfrom xgboost.sklearn import XGBRegressor', 'dfTest = pd.read_csv("Data/test.csv")\ndfTrain = ...est["OSMR_duration"] = OSRMtest["total_duration"]', '#Adding distance in km\n# credit to: https://stac... * asin(sqrt(a)) \n    km = 6367 * c\n    return km', "#Concat data for featuring engineering\nfeatures ...ain_set.drop(['id', label], axis=1).astype(float)", "param_test2 = {\n 'gamma':[i/10.0 for i in range(...res_, gsearch1.best_params_, gsearch1.best_score_", "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_"], 'KFold': <class 'sklearn.model_selection._split.KFold'>, 'MiniBatchKMeans': <class 'sklearn.cluster.k_means_.MiniBatchKMeans'>, 'OSRMtest':                id  vendor_id      pickup_datetim...1,37.2,73.4,26.9...  

[625134 rows x 17 columns], 'OSRMtrain':                 id  vendor_id      pickup_dateti...            90.7,0  

[1458644 rows x 18 columns], 'Out': {5: ([mean: -0.15457, std: 0.00001, params: {'gamma': 0.0}, mean: -0.15500, std: 0.00002, params: {'gamma': 0.1}, mean: -0.15476, std: 0.00055, params: {'gamma': 0.2}, mean: -0.15469, std: 0.00016, params: {'gamma': 0.3}, mean: -0.15452, std: 0.00010, params: {'gamma': 0.4}], {'gamma': 0.4}, -0.15451592362663652), 6: ([mean: -0.15128, std: 0.00016, params: {'gamma': 0.0}, mean: -0.15108, std: 0.00044, params: {'gamma': 0.1}, mean: -0.15125, std: 0.00031, params: {'gamma': 0.2}, mean: -0.15098, std: 0.00026, params: {'gamma': 0.3}, mean: -0.15106, std: 0.00053, params: {'gamma': 0.4}], {'gamma': 0.3}, -0.15097789445622198), 7: ([mean: -0.15914, std: 0.00036, params: {'colsample_bytree': 0.6, 'subsample': 0.6}, mean: -0.15705, std: 0.00019, params: {'colsample_bytree': 0.6, 'subsample': 0.7}, mean: -0.15487, std: 0.00012, params: {'colsample_bytree': 0.6, 'subsample': 0.8}, mean: -0.15328, std: 0.00011, params: {'colsample_bytree': 0.6, 'subsample': 0.9}, mean: -0.15929, std: 0.00004, params: {'colsample_bytree': 0.7, 'subsample': 0.6}, mean: -0.15673, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.7}, mean: -0.15480, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.8}, mean: -0.15337, std: 0.00008, params: {'colsample_bytree': 0.7, 'subsample': 0.9}, mean: -0.15927, std: 0.00047, params: {'colsample_bytree': 0.8, 'subsample': 0.6}, mean: -0.15660, std: 0.00020, params: {'colsample_bytree': 0.8, 'subsample': 0.7}, mean: -0.15452, std: 0.00010, params: {'colsample_bytree': 0.8, 'subsample': 0.8}, mean: -0.15329, std: 0.00018, params: {'colsample_bytree': 0.8, 'subsample': 0.9}, mean: -0.15898, std: 0.00010, params: {'colsample_bytree': 0.9, 'subsample': 0.6}, mean: -0.15640, std: 0.00065, params: {'colsample_bytree': 0.9, 'subsample': 0.7}, mean: -0.15436, std: 0.00012, params: {'colsample_bytree': 0.9, 'subsample': 0.8}, mean: -0.15276, std: 0.00031, params: {'colsample_bytree': 0.9, 'subsample': 0.9}], {'colsample_bytree': 0.9, 'subsample': 0.9}, -0.1527589964180896)}, 'XGBRegressor': <class 'xgboost.sklearn.XGBRegressor'>, '_': ([mean: -0.15914, std: 0.00036, params: {'colsample_bytree': 0.6, 'subsample': 0.6}, mean: -0.15705, std: 0.00019, params: {'colsample_bytree': 0.6, 'subsample': 0.7}, mean: -0.15487, std: 0.00012, params: {'colsample_bytree': 0.6, 'subsample': 0.8}, mean: -0.15328, std: 0.00011, params: {'colsample_bytree': 0.6, 'subsample': 0.9}, mean: -0.15929, std: 0.00004, params: {'colsample_bytree': 0.7, 'subsample': 0.6}, mean: -0.15673, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.7}, mean: -0.15480, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.8}, mean: -0.15337, std: 0.00008, params: {'colsample_bytree': 0.7, 'subsample': 0.9}, mean: -0.15927, std: 0.00047, params: {'colsample_bytree': 0.8, 'subsample': 0.6}, mean: -0.15660, std: 0.00020, params: {'colsample_bytree': 0.8, 'subsample': 0.7}, mean: -0.15452, std: 0.00010, params: {'colsample_bytree': 0.8, 'subsample': 0.8}, mean: -0.15329, std: 0.00018, params: {'colsample_bytree': 0.8, 'subsample': 0.9}, mean: -0.15898, std: 0.00010, params: {'colsample_bytree': 0.9, 'subsample': 0.6}, mean: -0.15640, std: 0.00065, params: {'colsample_bytree': 0.9, 'subsample': 0.7}, mean: -0.15436, std: 0.00012, params: {'colsample_bytree': 0.9, 'subsample': 0.8}, mean: -0.15276, std: 0.00031, params: {'colsample_bytree': 0.9, 'subsample': 0.9}], {'colsample_bytree': 0.9, 'subsample': 0.9}, -0.1527589964180896), '_5': ([mean: -0.15457, std: 0.00001, params: {'gamma': 0.0}, mean: -0.15500, std: 0.00002, params: {'gamma': 0.1}, mean: -0.15476, std: 0.00055, params: {'gamma': 0.2}, mean: -0.15469, std: 0.00016, params: {'gamma': 0.3}, mean: -0.15452, std: 0.00010, params: {'gamma': 0.4}], {'gamma': 0.4}, -0.15451592362663652), ...}
        self.user_ns = {'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', '#General\nimport pandas as pd\npd.options.mode.cha...t as xgb\nfrom xgboost.sklearn import XGBRegressor', 'dfTest = pd.read_csv("Data/test.csv")\ndfTrain = ...est["OSMR_duration"] = OSRMtest["total_duration"]', '#Adding distance in km\n# credit to: https://stac... * asin(sqrt(a)) \n    km = 6367 * c\n    return km', "#Concat data for featuring engineering\nfeatures ...ain_set.drop(['id', label], axis=1).astype(float)", "param_test2 = {\n 'gamma':[i/10.0 for i in range(...res_, gsearch1.best_params_, gsearch1.best_score_", "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_", "param_test1 = {\n 'subsample':[i/10.0 for i in ra...res_, gsearch1.best_params_, gsearch1.best_score_"], 'KFold': <class 'sklearn.model_selection._split.KFold'>, 'MiniBatchKMeans': <class 'sklearn.cluster.k_means_.MiniBatchKMeans'>, 'OSRMtest':                id  vendor_id      pickup_datetim...1,37.2,73.4,26.9...  

[625134 rows x 17 columns], 'OSRMtrain':                 id  vendor_id      pickup_dateti...            90.7,0  

[1458644 rows x 18 columns], 'Out': {5: ([mean: -0.15457, std: 0.00001, params: {'gamma': 0.0}, mean: -0.15500, std: 0.00002, params: {'gamma': 0.1}, mean: -0.15476, std: 0.00055, params: {'gamma': 0.2}, mean: -0.15469, std: 0.00016, params: {'gamma': 0.3}, mean: -0.15452, std: 0.00010, params: {'gamma': 0.4}], {'gamma': 0.4}, -0.15451592362663652), 6: ([mean: -0.15128, std: 0.00016, params: {'gamma': 0.0}, mean: -0.15108, std: 0.00044, params: {'gamma': 0.1}, mean: -0.15125, std: 0.00031, params: {'gamma': 0.2}, mean: -0.15098, std: 0.00026, params: {'gamma': 0.3}, mean: -0.15106, std: 0.00053, params: {'gamma': 0.4}], {'gamma': 0.3}, -0.15097789445622198), 7: ([mean: -0.15914, std: 0.00036, params: {'colsample_bytree': 0.6, 'subsample': 0.6}, mean: -0.15705, std: 0.00019, params: {'colsample_bytree': 0.6, 'subsample': 0.7}, mean: -0.15487, std: 0.00012, params: {'colsample_bytree': 0.6, 'subsample': 0.8}, mean: -0.15328, std: 0.00011, params: {'colsample_bytree': 0.6, 'subsample': 0.9}, mean: -0.15929, std: 0.00004, params: {'colsample_bytree': 0.7, 'subsample': 0.6}, mean: -0.15673, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.7}, mean: -0.15480, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.8}, mean: -0.15337, std: 0.00008, params: {'colsample_bytree': 0.7, 'subsample': 0.9}, mean: -0.15927, std: 0.00047, params: {'colsample_bytree': 0.8, 'subsample': 0.6}, mean: -0.15660, std: 0.00020, params: {'colsample_bytree': 0.8, 'subsample': 0.7}, mean: -0.15452, std: 0.00010, params: {'colsample_bytree': 0.8, 'subsample': 0.8}, mean: -0.15329, std: 0.00018, params: {'colsample_bytree': 0.8, 'subsample': 0.9}, mean: -0.15898, std: 0.00010, params: {'colsample_bytree': 0.9, 'subsample': 0.6}, mean: -0.15640, std: 0.00065, params: {'colsample_bytree': 0.9, 'subsample': 0.7}, mean: -0.15436, std: 0.00012, params: {'colsample_bytree': 0.9, 'subsample': 0.8}, mean: -0.15276, std: 0.00031, params: {'colsample_bytree': 0.9, 'subsample': 0.9}], {'colsample_bytree': 0.9, 'subsample': 0.9}, -0.1527589964180896)}, 'XGBRegressor': <class 'xgboost.sklearn.XGBRegressor'>, '_': ([mean: -0.15914, std: 0.00036, params: {'colsample_bytree': 0.6, 'subsample': 0.6}, mean: -0.15705, std: 0.00019, params: {'colsample_bytree': 0.6, 'subsample': 0.7}, mean: -0.15487, std: 0.00012, params: {'colsample_bytree': 0.6, 'subsample': 0.8}, mean: -0.15328, std: 0.00011, params: {'colsample_bytree': 0.6, 'subsample': 0.9}, mean: -0.15929, std: 0.00004, params: {'colsample_bytree': 0.7, 'subsample': 0.6}, mean: -0.15673, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.7}, mean: -0.15480, std: 0.00001, params: {'colsample_bytree': 0.7, 'subsample': 0.8}, mean: -0.15337, std: 0.00008, params: {'colsample_bytree': 0.7, 'subsample': 0.9}, mean: -0.15927, std: 0.00047, params: {'colsample_bytree': 0.8, 'subsample': 0.6}, mean: -0.15660, std: 0.00020, params: {'colsample_bytree': 0.8, 'subsample': 0.7}, mean: -0.15452, std: 0.00010, params: {'colsample_bytree': 0.8, 'subsample': 0.8}, mean: -0.15329, std: 0.00018, params: {'colsample_bytree': 0.8, 'subsample': 0.9}, mean: -0.15898, std: 0.00010, params: {'colsample_bytree': 0.9, 'subsample': 0.6}, mean: -0.15640, std: 0.00065, params: {'colsample_bytree': 0.9, 'subsample': 0.7}, mean: -0.15436, std: 0.00012, params: {'colsample_bytree': 0.9, 'subsample': 0.8}, mean: -0.15276, std: 0.00031, params: {'colsample_bytree': 0.9, 'subsample': 0.9}], {'colsample_bytree': 0.9, 'subsample': 0.9}, -0.1527589964180896), '_5': ([mean: -0.15457, std: 0.00001, params: {'gamma': 0.0}, mean: -0.15500, std: 0.00002, params: {'gamma': 0.1}, mean: -0.15476, std: 0.00055, params: {'gamma': 0.2}, mean: -0.15469, std: 0.00016, params: {'gamma': 0.3}, mean: -0.15452, std: 0.00010, params: {'gamma': 0.4}], {'gamma': 0.4}, -0.15451592362663652), ...}
   2882             finally:
   2883                 # Reset our crash handler in place
   2884                 sys.excepthook = old_excepthook
   2885         except SystemExit as e:

...........................................................................
C:\Users\Clement\iNotebook\Kaggle\TaxisNYC\<ipython-input-8-d95a459dc1ce> in <module>()
     17  seed=27)
     18 
     19 gsearch1 = GridSearchCV(xgb1 , param_grid = param_test1, scoring='neg_mean_squared_error',n_jobs=-1,iid=False, cv=2, verbose=20)
     20 
     21 #Fit
---> 22 gsearch1.fit(data, target_log)
     23 
     24 #Print scores
     25 gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
     26 

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=2, error_score='raise',
       e...    scoring='neg_mean_squared_error', verbose=20), X=         vendor_id  passenger_count  pickup_long...1458643   0.795369  

[1458640 rows x 17 columns], y=0          6.122493
1          6.498282
2       ...3    5.293305
Name: trip_duration, dtype: float64, groups=None)
    940 
    941         groups : array-like, with shape (n_samples,), optional
    942             Group labels for the samples used while splitting the dataset into
    943             train/test set.
    944         """
--> 945         return self._fit(X, y, groups, ParameterGrid(self.param_grid))
        self._fit = <bound method BaseSearchCV._fit of GridSearchCV(...   scoring='neg_mean_squared_error', verbose=20)>
        X =          vendor_id  passenger_count  pickup_long...1458643   0.795369  

[1458640 rows x 17 columns]
        y = 0          6.122493
1          6.498282
2       ...3    5.293305
Name: trip_duration, dtype: float64
        groups = None
        self.param_grid = {'colsample_bytree': [0.9, 1.0, 1.1, 1.2], 'subsample': [0.9, 1.0, 1.1, 1.2]}
    946 
    947 
    948 class RandomizedSearchCV(BaseSearchCV):
    949     """Randomized search on hyper parameters.

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _fit(self=GridSearchCV(cv=2, error_score='raise',
       e...    scoring='neg_mean_squared_error', verbose=20), X=         vendor_id  passenger_count  pickup_long...1458643   0.795369  

[1458640 rows x 17 columns], y=0          6.122493
1          6.498282
2       ...3    5.293305
Name: trip_duration, dtype: float64, groups=None, parameter_iterable=<sklearn.model_selection._search.ParameterGrid object>)
    559                                   fit_params=self.fit_params,
    560                                   return_train_score=self.return_train_score,
    561                                   return_n_test_samples=True,
    562                                   return_times=True, return_parameters=True,
    563                                   error_score=self.error_score)
--> 564           for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.model_selection._search.ParameterGrid object>
    565           for train, test in cv_iter)
    566 
    567         # if one choose to see train score, "out" will contain train score info
    568         if self.return_train_score:

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV._fit.<locals>.<genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
XGBoostError                                       Thu Jul 27 17:21:37 2017
PID: 2372               Python 3.6.0: C:\Users\Clement\Anaconda3\python.exe
...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (XGBRegressor(base_score=0.5, colsample_bylevel=1...os_weight=1, seed=27, silent=True, subsample=1.1),          vendor_id  passenger_count  pickup_long...1458643   0.795369  

[1458640 rows x 17 columns], 0          6.122493
1          6.498282
2       ...3    5.293305
Name: trip_duration, dtype: float64, make_scorer(mean_squared_error, greater_is_better=False), memmap([ 729320,  729321,  729322, ..., 1458637, 1458638, 1458639]), memmap([     0,      1,      2, ..., 729317, 729318, 729319]), 20, {'colsample_bytree': 0.9, 'subsample': 1.1}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (XGBRegressor(base_score=0.5, colsample_bylevel=1...os_weight=1, seed=27, silent=True, subsample=1.1),          vendor_id  passenger_count  pickup_long...1458643   0.795369  

[1458640 rows x 17 columns], 0          6.122493
1          6.498282
2       ...3    5.293305
Name: trip_duration, dtype: float64, make_scorer(mean_squared_error, greater_is_better=False), memmap([ 729320,  729321,  729322, ..., 1458637, 1458638, 1458639]), memmap([     0,      1,      2, ..., 729317, 729318, 729319]), 20, {'colsample_bytree': 0.9, 'subsample': 1.1})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=XGBRegressor(base_score=0.5, colsample_bylevel=1...os_weight=1, seed=27, silent=True, subsample=1.1), X=         vendor_id  passenger_count  pickup_long...1458643   0.795369  

[1458640 rows x 17 columns], y=0          6.122493
1          6.498282
2       ...3    5.293305
Name: trip_duration, dtype: float64, scorer=make_scorer(mean_squared_error, greater_is_better=False), train=memmap([ 729320,  729321,  729322, ..., 1458637, 1458638, 1458639]), test=memmap([     0,      1,      2, ..., 729317, 729318, 729319]), verbose=20, parameters={'colsample_bytree': 0.9, 'subsample': 1.1}, fit_params={}, return_train_score=True, return_parameters=True, return_n_test_samples=True, return_times=True, error_score='raise')
    233 
    234     try:
    235         if y_train is None:
    236             estimator.fit(X_train, **fit_params)
    237         else:
--> 238             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method XGBModel.fit of XGBRegressor(base_...s_weight=1, seed=27, silent=True, subsample=1.1)>
        X_train =          vendor_id  passenger_count  pickup_long...
1458643   0.795369  

[729320 rows x 17 columns]
        y_train = 729322     6.350886
729323     5.730100
729324  ...3    5.293305
Name: trip_duration, dtype: float64
        fit_params = {}
    239 
    240     except Exception as e:
    241         # Note fit time as time until error
    242         fit_time = time.time() - start_time

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\xgboost\sklearn.py in fit(self=XGBRegressor(base_score=0.5, colsample_bylevel=1...os_weight=1, seed=27, silent=True, subsample=1.1), X=         vendor_id  passenger_count  pickup_long...
1458643   0.795369  

[729320 rows x 17 columns], y=729322     6.350886
729323     5.730100
729324  ...3    5.293305
Name: trip_duration, dtype: float64, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True)
    246 
    247         self._Booster = train(params, trainDmatrix,
    248                               self.n_estimators, evals=evals,
    249                               early_stopping_rounds=early_stopping_rounds,
    250                               evals_result=evals_result, obj=obj, feval=feval,
--> 251                               verbose_eval=verbose)
        verbose = True
    252 
    253         if evals_result:
    254             for val in evals_result.items():
    255                 evals_result_key = list(val[1].keys())[0]

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\xgboost\training.py in train(params={'base_score': 0.5, 'colsample_bylevel': 1, 'colsample_bytree': 0.9, 'gamma': 0.4, 'learning_rate': 0.3, 'max_delta_step': 0, 'max_depth': 7, 'min_child_weight': 3, 'missing': nan, 'n_estimators': 665, ...}, dtrain=<xgboost.core.DMatrix object>, num_boost_round=665, evals=(), obj=None, feval=None, maximize=False, early_stopping_rounds=None, evals_result={}, verbose_eval=True, learning_rates=None, xgb_model=None, callbacks=[<function print_evaluation.<locals>.callback>, <function record_evaluation.<locals>.callback>])
    200 
    201     return _train_internal(params, dtrain,
    202                            num_boost_round=num_boost_round,
    203                            evals=evals,
    204                            obj=obj, feval=feval,
--> 205                            xgb_model=xgb_model, callbacks=callbacks)
        xgb_model = None
        callbacks = [<function print_evaluation.<locals>.callback>, <function record_evaluation.<locals>.callback>]
    206 
    207 
    208 class CVPack(object):
    209     """"Auxiliary datastruct to hold one fold of CV."""

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\xgboost\training.py in _train_internal(params={'base_score': 0.5, 'colsample_bylevel': 1, 'colsample_bytree': 0.9, 'gamma': 0.4, 'learning_rate': 0.3, 'max_delta_step': 0, 'max_depth': 7, 'min_child_weight': 3, 'missing': nan, 'n_estimators': 665, ...}, dtrain=<xgboost.core.DMatrix object>, num_boost_round=665, evals=[], obj=None, feval=None, xgb_model=None, callbacks=[<function print_evaluation.<locals>.callback>, <function record_evaluation.<locals>.callback>])
     71                            rank=rank,
     72                            evaluation_result_list=None))
     73         # Distributed code: need to resume to this point.
     74         # Skip the first update if it is a recovery step.
     75         if version % 2 == 0:
---> 76             bst.update(dtrain, i, obj)
        bst.update = <bound method Booster.update of <xgboost.core.Booster object>>
        dtrain = <xgboost.core.DMatrix object>
        i = 0
        obj = None
     77             bst.save_rabit_checkpoint()
     78             version += 1
     79 
     80         assert(rabit.get_world_size() == 1 or version == rabit.version_number())

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\xgboost\core.py in update(self=<xgboost.core.Booster object>, dtrain=<xgboost.core.DMatrix object>, iteration=0, fobj=None)
    801         if not isinstance(dtrain, DMatrix):
    802             raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
    803         self._validate_features(dtrain)
    804 
    805         if fobj is None:
--> 806             _check_call(_LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle))
        self.handle = c_void_p(1948505413472)
        iteration = 0
        dtrain.handle = c_void_p(1948174249104)
    807         else:
    808             pred = self.predict(dtrain)
    809             grad, hess = fobj(pred, dtrain)
    810             self.boost(dtrain, grad, hess)

...........................................................................
C:\Users\Clement\Anaconda3\lib\site-packages\xgboost\core.py in _check_call(ret=-1)
    122     ----------
    123     ret : int
    124         return value from API calls
    125     """
    126     if ret != 0:
--> 127         raise XGBoostError(_LIB.XGBGetLastError())
    128 
    129 
    130 def ctypes2numpy(cptr, length, dtype):
    131     """Convert a ctypes pointer array to a numpy array.

XGBoostError: b'value 1.1for Parameter subsample exceed bound [0,1]'
___________________________________________________________________________

#### Step 5: Tuning Regularization Parameters

In [10]:
param_test1 = {
 #'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100,1000,10000]
 'reg_alpha':[100,1000,10000]
}

xgb1 = XGBRegressor(
 learning_rate =0.3,
 n_estimators=665,
 max_depth=7,
 min_child_weight=3,
 gamma=0.4,
 subsample=0.9,
 colsample_bytree=0.9,
 objective= 'reg:linear',
 nthread=-1,
 scale_pos_weight=1,
 seed=27)

gsearch1 = GridSearchCV(xgb1 , param_grid = param_test1, scoring='neg_mean_squared_error',n_jobs=-1,iid=False, cv=2, verbose=20)

#Fit
gsearch1.fit(data, target_log)

#Print scores
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_

Fitting 2 folds for each of 3 candidates, totalling 6 fits


[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done   2 out of   6 | elapsed:  3.7min remaining:  7.5min
[Parallel(n_jobs=-1)]: Done   3 out of   6 | elapsed:  4.7min remaining:  4.7min
[Parallel(n_jobs=-1)]: Done   4 out of   6 | elapsed:  4.9min remaining:  2.4min
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  9.2min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  9.2min finished


([mean: -0.14677, std: 0.00007, params: {'reg_alpha': 100},
  mean: -0.16942, std: 0.00050, params: {'reg_alpha': 1000},
  mean: -0.20881, std: 0.00059, params: {'reg_alpha': 10000}],
 {'reg_alpha': 100},
 -0.14676679858422431)

#### Step 6: Reducing Learning Rate

In [15]:
xgb1 = XGBRegressor(
 learning_rate =0.1,
 n_estimators=50000,
 max_depth=7,
 min_child_weight=3,
 gamma=0.4,
 subsample=0.9,
 colsample_bytree=0.9,
 objective= 'reg:linear',
 nthread=-1,
 scale_pos_weight=1,
 reg_alpha=100,
 seed=27)

XGBmodelfit(xgb1, data, target_log)

[0]	train-rmse:5.42211+0.000123	test-rmse:5.42213+0.000221
[10]	train-rmse:1.94064+3.55e-05	test-rmse:1.94091+0.0003735
[20]	train-rmse:0.793302+5.5e-05	test-rmse:0.794159+0.000618
[30]	train-rmse:0.486876+0.0001745	test-rmse:0.488603+0.000548
[40]	train-rmse:0.426585+0.0004545	test-rmse:0.428839+0.0002505
[50]	train-rmse:0.413461+0.0002085	test-rmse:0.416029+0.000446
[60]	train-rmse:0.408566+0.0001985	test-rmse:0.411394+0.000426
[70]	train-rmse:0.405382+0.000194	test-rmse:0.408431+0.000439
[80]	train-rmse:0.402895+0.0001595	test-rmse:0.406154+0.0004565
[90]	train-rmse:0.401059+0.000102	test-rmse:0.404479+0.000495
[100]	train-rmse:0.399592+3.2e-05	test-rmse:0.403174+0.000557
[110]	train-rmse:0.398076+0.0001225	test-rmse:0.401797+0.0004715
[120]	train-rmse:0.397039+0.0001735	test-rmse:0.400892+0.0004135
[130]	train-rmse:0.395833+0.00028	test-rmse:0.399831+0.000307
[140]	train-rmse:0.394633+0.0004485	test-rmse:0.398764+0.0001705
[150]	train-rmse:0.393508+0.0005385	test-rmse:0.397787+7.2e

[1290]	train-rmse:0.372379+0.000668	test-rmse:0.38229+0.000275
[1300]	train-rmse:0.372328+0.000672	test-rmse:0.382265+0.000276
[1310]	train-rmse:0.372293+0.000671	test-rmse:0.382252+0.000277
[1320]	train-rmse:0.372242+0.000671	test-rmse:0.382229+0.000284
[1330]	train-rmse:0.372188+0.00067	test-rmse:0.382203+0.0002885
[1340]	train-rmse:0.372148+0.0006645	test-rmse:0.382181+0.000291
[1350]	train-rmse:0.372115+0.0006665	test-rmse:0.382167+0.000292
[1360]	train-rmse:0.37208+0.0006705	test-rmse:0.382149+0.0002855
[1370]	train-rmse:0.37203+0.00067	test-rmse:0.382125+0.000291
[1380]	train-rmse:0.371997+0.0006695	test-rmse:0.382108+0.0002965
[1390]	train-rmse:0.371961+0.0006685	test-rmse:0.382092+0.0002995
[1400]	train-rmse:0.371918+0.0006695	test-rmse:0.382068+0.000303
[1410]	train-rmse:0.371885+0.000669	test-rmse:0.382053+0.000305
[1420]	train-rmse:0.371845+0.0006615	test-rmse:0.382033+0.000309
[1430]	train-rmse:0.371801+0.000673	test-rmse:0.382008+0.0003035
[1440]	train-rmse:0.371771+0.0006

[2560]	train-rmse:0.369071+0.000744	test-rmse:0.380825+0.000312
[2570]	train-rmse:0.369049+0.0007485	test-rmse:0.380814+0.000307
[2580]	train-rmse:0.369032+0.0007545	test-rmse:0.38081+0.0003065
[2590]	train-rmse:0.36901+0.000757	test-rmse:0.380802+0.000311
[2600]	train-rmse:0.368992+0.0007545	test-rmse:0.380796+0.0003125
[2610]	train-rmse:0.368977+0.0007535	test-rmse:0.380787+0.000313
[2620]	train-rmse:0.368962+0.000756	test-rmse:0.380784+0.00031
[2630]	train-rmse:0.368942+0.000751	test-rmse:0.380776+0.000312
[2640]	train-rmse:0.368926+0.0007505	test-rmse:0.380769+0.0003135
[2650]	train-rmse:0.368905+0.000757	test-rmse:0.380761+0.0003085
[2660]	train-rmse:0.368885+0.0007535	test-rmse:0.380754+0.0003095
[2670]	train-rmse:0.368877+0.0007565	test-rmse:0.380754+0.0003085
[2680]	train-rmse:0.368865+0.0007575	test-rmse:0.380749+0.0003055
[2690]	train-rmse:0.368849+0.0007525	test-rmse:0.380743+0.000308
[2700]	train-rmse:0.368829+0.000749	test-rmse:0.380733+0.0003105
[2710]	train-rmse:0.368813

[3830]	train-rmse:0.367436+0.00076	test-rmse:0.380224+0.000326
[3840]	train-rmse:0.367427+0.000759	test-rmse:0.380221+0.000327
[3850]	train-rmse:0.367419+0.00076	test-rmse:0.380219+0.000325
[3860]	train-rmse:0.367407+0.000754	test-rmse:0.380216+0.000328
[3870]	train-rmse:0.367395+0.0007545	test-rmse:0.380215+0.000326
[3880]	train-rmse:0.367388+0.000753	test-rmse:0.380212+0.0003275
[3890]	train-rmse:0.367376+0.000753	test-rmse:0.380207+0.0003285
[3900]	train-rmse:0.367371+0.000753	test-rmse:0.380205+0.000328
[3910]	train-rmse:0.367356+0.0007525	test-rmse:0.380199+0.000327
[3920]	train-rmse:0.367343+0.0007515	test-rmse:0.380193+0.0003275
[3930]	train-rmse:0.367335+0.0007535	test-rmse:0.380188+0.0003265
[3940]	train-rmse:0.367325+0.000753	test-rmse:0.380185+0.0003275
[3950]	train-rmse:0.367313+0.000748	test-rmse:0.380179+0.00033
[3960]	train-rmse:0.367306+0.0007515	test-rmse:0.380177+0.0003285
[3970]	train-rmse:0.367293+0.0007555	test-rmse:0.380172+0.000327
[3980]	train-rmse:0.367279+0.00

[5100]	train-rmse:0.366377+0.000771	test-rmse:0.379877+0.0003225
[5110]	train-rmse:0.366369+0.000773	test-rmse:0.379875+0.0003215
[5120]	train-rmse:0.36636+0.000774	test-rmse:0.379872+0.0003205
[5130]	train-rmse:0.366353+0.0007715	test-rmse:0.379871+0.00032
[5140]	train-rmse:0.366347+0.000772	test-rmse:0.379869+0.000322
[5150]	train-rmse:0.366335+0.0007725	test-rmse:0.379866+0.0003205
[5160]	train-rmse:0.366325+0.000774	test-rmse:0.379861+0.0003215
[5170]	train-rmse:0.366317+0.000771	test-rmse:0.379857+0.000322
[5180]	train-rmse:0.366313+0.000771	test-rmse:0.379855+0.000323
[5190]	train-rmse:0.366304+0.000775	test-rmse:0.379853+0.0003235
[5200]	train-rmse:0.366293+0.0007745	test-rmse:0.379848+0.000323
[5210]	train-rmse:0.366289+0.0007715	test-rmse:0.379848+0.0003235
[5220]	train-rmse:0.366277+0.000773	test-rmse:0.379846+0.0003255
[5230]	train-rmse:0.366274+0.0007755	test-rmse:0.379846+0.000325
[5240]	train-rmse:0.366266+0.0007765	test-rmse:0.379845+0.0003265
[5250]	train-rmse:0.36626+0

[6370]	train-rmse:0.365572+0.000767	test-rmse:0.379647+0.00035
[6380]	train-rmse:0.365571+0.0007655	test-rmse:0.379646+0.000351
[6390]	train-rmse:0.365565+0.000768	test-rmse:0.379644+0.00035
[6400]	train-rmse:0.365561+0.000771	test-rmse:0.379644+0.0003485
[6410]	train-rmse:0.365552+0.000771	test-rmse:0.379641+0.000348
[6420]	train-rmse:0.365546+0.000773	test-rmse:0.379638+0.000347
[6430]	train-rmse:0.36554+0.0007725	test-rmse:0.379636+0.0003485
[6440]	train-rmse:0.365534+0.000773	test-rmse:0.379633+0.0003485
[6450]	train-rmse:0.365527+0.00077	test-rmse:0.379632+0.0003505
[6460]	train-rmse:0.365523+0.000768	test-rmse:0.379632+0.0003505
[6470]	train-rmse:0.365521+0.000768	test-rmse:0.379632+0.0003495
[6480]	train-rmse:0.365515+0.000766	test-rmse:0.379633+0.000348
[6490]	train-rmse:0.365511+0.0007645	test-rmse:0.37963+0.000349
[6500]	train-rmse:0.365504+0.00077	test-rmse:0.379629+0.0003485
[6510]	train-rmse:0.365497+0.00077	test-rmse:0.379628+0.0003495
[6520]	train-rmse:0.365493+0.000772	

[7640]	train-rmse:0.364928+0.0007565	test-rmse:0.379464+0.000361
[7650]	train-rmse:0.364925+0.000758	test-rmse:0.379463+0.00036
[7660]	train-rmse:0.364921+0.00076	test-rmse:0.37946+0.0003585
[7670]	train-rmse:0.364919+0.0007595	test-rmse:0.37946+0.0003585
[7680]	train-rmse:0.364916+0.000758	test-rmse:0.37946+0.000358
[7690]	train-rmse:0.364912+0.000757	test-rmse:0.37946+0.0003585
[7700]	train-rmse:0.364907+0.000758	test-rmse:0.379458+0.000358
[7710]	train-rmse:0.364903+0.000755	test-rmse:0.379456+0.0003595
[7720]	train-rmse:0.364899+0.0007545	test-rmse:0.379456+0.0003595
[7730]	train-rmse:0.364893+0.000756	test-rmse:0.379453+0.0003615
[7740]	train-rmse:0.364888+0.0007555	test-rmse:0.379453+0.000361
[7750]	train-rmse:0.364884+0.000754	test-rmse:0.379452+0.000362
[7760]	train-rmse:0.364877+0.0007545	test-rmse:0.379451+0.000365
[7770]	train-rmse:0.364874+0.000753	test-rmse:0.37945+0.000366
[7780]	train-rmse:0.364871+0.000752	test-rmse:0.379448+0.0003675
[7790]	train-rmse:0.364868+0.000753

[8910]	train-rmse:0.364395+0.0007865	test-rmse:0.379335+0.0003635
[8920]	train-rmse:0.364393+0.000787	test-rmse:0.379335+0.0003635
[8930]	train-rmse:0.36439+0.000788	test-rmse:0.379333+0.0003625
[8940]	train-rmse:0.364388+0.000789	test-rmse:0.379333+0.000362
[8950]	train-rmse:0.364384+0.000789	test-rmse:0.379331+0.000363
[8960]	train-rmse:0.36438+0.000786	test-rmse:0.37933+0.000364
[8970]	train-rmse:0.364373+0.000788	test-rmse:0.379329+0.000363
[8980]	train-rmse:0.36437+0.0007875	test-rmse:0.379329+0.000362
[8990]	train-rmse:0.364366+0.0007885	test-rmse:0.379328+0.000361
[9000]	train-rmse:0.364359+0.0007885	test-rmse:0.379326+0.000363
[9010]	train-rmse:0.364353+0.0007875	test-rmse:0.379326+0.000365
[9020]	train-rmse:0.364348+0.0007885	test-rmse:0.379327+0.000364
[9030]	train-rmse:0.364344+0.0007905	test-rmse:0.379325+0.0003625
[9040]	train-rmse:0.364339+0.000792	test-rmse:0.379324+0.000363
[9050]	train-rmse:0.364335+0.0007935	test-rmse:0.379324+0.0003625
[9060]	train-rmse:0.364331+0.00

[10180]	train-rmse:0.363943+0.000808	test-rmse:0.379209+0.0003715
[10190]	train-rmse:0.363939+0.000808	test-rmse:0.379209+0.000372
[10200]	train-rmse:0.363937+0.000809	test-rmse:0.379208+0.000371
[10210]	train-rmse:0.363934+0.000807	test-rmse:0.379208+0.00037
[10220]	train-rmse:0.363931+0.000809	test-rmse:0.379208+0.00037
[10230]	train-rmse:0.36393+0.00081	test-rmse:0.379209+0.0003705
[10240]	train-rmse:0.363925+0.000813	test-rmse:0.379207+0.000369
[10250]	train-rmse:0.363924+0.000812	test-rmse:0.379207+0.000369
[10260]	train-rmse:0.363922+0.0008125	test-rmse:0.379207+0.0003685
[10270]	train-rmse:0.363919+0.000814	test-rmse:0.379207+0.000368
[10280]	train-rmse:0.363916+0.0008165	test-rmse:0.379206+0.000368
[10290]	train-rmse:0.363912+0.000815	test-rmse:0.379204+0.0003685
[10300]	train-rmse:0.363911+0.0008155	test-rmse:0.379204+0.0003685
[10310]	train-rmse:0.363907+0.000813	test-rmse:0.379205+0.000368
[10320]	train-rmse:0.363903+0.0008145	test-rmse:0.379203+0.000366
[10330]	train-rmse:0

[11430]	train-rmse:0.36355+0.000815	test-rmse:0.379121+0.0003745
[11440]	train-rmse:0.363548+0.0008145	test-rmse:0.379121+0.000375
[11450]	train-rmse:0.363544+0.000816	test-rmse:0.37912+0.0003725
[11460]	train-rmse:0.36354+0.000819	test-rmse:0.37912+0.000372


KeyboardInterrupt: 

In [None]:
xgb1.n_estimators