In [1]:
import pandas as pd
import numpy as np

#from xgboost.sklearn import XGBRegressor
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [2]:
train_filename_iq = ( './../datasets/dengue_test_iq.pkl' )
dengue_test_iq = pd.read_pickle( train_filename_iq )

test_filename_sj = ( './../datasets/dengue_test_sj.pkl' )
dengue_test_sj = pd.read_pickle( test_filename_sj )

submission_sj = pd.DataFrame( dengue_test_sj, columns = ['city'])
submission_iq = pd.DataFrame( dengue_test_iq, columns = ['city'])
submission_sj.reset_index( inplace = True)
submission_sj['year'] = submission_sj['week_start_date'].dt.year
submission_sj['weekofyear'] = submission_sj['week_start_date'].dt.weekofyear
submission_iq.reset_index( inplace = True)
submission_iq['year'] = submission_iq['week_start_date'].dt.year
submission_iq['weekofyear'] = submission_iq['week_start_date'].dt.weekofyear
submission = submission_sj.append( submission_iq )
submission.drop( columns = ['week_start_date'], inplace = True )

In [3]:
sub_sj = pd.DataFrame()
sub_iq = pd.DataFrame()

In [4]:
for month in range( 1, 13 ):
    train_filename = ( './../datasets/train_sj_month_' + str( month ) + '.pkl' )
    test_filename  = ( './../datasets/test_sj_month_' + str( month ) + '.pkl' )
    dengue_train_sj_month = pd.read_pickle( train_filename )
    dengue_test_sj_month  = pd.read_pickle( test_filename )
    X = pd.DataFrame( dengue_train_sj_month )
    X.drop( columns = ['city','year','total_cases','month'], inplace = True )
    y = dengue_train_sj_month.total_cases
    parameters_for_testing = { 'learning_rate':[0.1,0.01],
                               'max_depth':[2,3,4],
                               'n_estimators':[1000,500],
                               'gamma':[0,0.01],
                               'min_child_weight':[12,15,20],
                               'colsample_bytree':[0.5,0.4,0.6],
                               'reg_alpha':[0.01],
                               'reg_lambda':[1e-5],
                               'subsample':[0.95]
                             }
    xgb_model = xgb.XGBRegressor()
    gs_xg = GridSearchCV( estimator = xgb_model, param_grid = parameters_for_testing,
                             n_jobs=4,iid=False, verbose=1,scoring='neg_mean_squared_error', cv= 5).fit(X,y, eval_metric='rmse')
    X_sj_train, X_sj_test, Y_sj_train, Y_sj_test = train_test_split( X, y, shuffle = False)
    Y_sj_pred = gs_xg.best_estimator_.predict(X_sj_test).astype(int)
    print ("San Juan Test  MAE error :", mean_absolute_error(Y_sj_pred, Y_sj_test))
    pred_train_sj = gs_xg.best_estimator_.predict(X_sj_train).astype(int)
    print ("San Juan Train MAE error :", mean_absolute_error(pred_train_sj, Y_sj_train))
    X_test = pd.DataFrame( dengue_test_sj_month, columns = X.columns )
    submit_pred_sj = pd.DataFrame( dengue_test_sj_month, columns = ['city','week_start_date'])
    submit_pred_sj['total_cases'] = gs_xg.predict(X_test).astype( 'int')
    sub_sj = sub_sj.append( submit_pred_sj )
sub_sj = sub_sj.drop( columns=['week_start_date'])
sub_sj.reset_index( inplace = True )
sub_sj['year'] = sub_sj['week_start_date'].dt.year
sub_sj['weekofyear'] = sub_sj['week_start_date'].dt.weekofyear
sub_sj.drop( columns = ['week_start_date'], inplace = True )

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   42.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.5min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  3.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 3.5217391304347827
San Juan Train MAE error : 5.537313432835821
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.9s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   30.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 2.611111111111111
San Juan Train MAE error : 5.314814814814815
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.6s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   29.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 2.5
San Juan Train MAE error : 4.5
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    8.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   36.5s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  3.4min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.43478260869565216
San Juan Train MAE error : 0.4696969696969697
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   29.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 2.1666666666666665
San Juan Train MAE error : 2.6923076923076925
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.8s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   30.7s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 3.5
San Juan Train MAE error : 6.333333333333333
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    7.9s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   36.2s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  3.4min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 9.521739130434783
San Juan Train MAE error : 12.686567164179104
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   31.8s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 21.11111111111111
San Juan Train MAE error : 21.5
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   28.5s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 10.210526315789474
San Juan Train MAE error : 18.24561403508772
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   12.9s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  4.1min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.4090909090909091
San Juan Train MAE error : 0.5
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    6.4s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   27.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.4444444444444444
San Juan Train MAE error : 0.46296296296296297
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.8s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   26.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   59.5s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.5min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 7.5
San Juan Train MAE error : 17.037037037037038


In [5]:
for month in range( 1, 13 ):
    train_filename = ( './../datasets/train_iq_month_' + str( month ) + '.pkl' )
    test_filename  = ( './../datasets/test_iq_month_' + str( month ) + '.pkl' )
    dengue_train_iq_month = pd.read_pickle( train_filename )
    dengue_test_iq_month  = pd.read_pickle( test_filename )
    X = pd.DataFrame( dengue_train_iq_month )
    X.drop( columns = ['city','year','total_cases','month'], inplace = True )
    y = dengue_train_iq_month.total_cases
    parameters_for_testing = { 'learning_rate':[0.1,0.01],#
                               'max_depth':[2,4],
                               'n_estimators':[700,500,250],
                               'gamma':[0,0.01],
                               'min_child_weight':[5,10,15,20],
                               'colsample_bytree':[0.5,0.4,0.6],
                               'reg_alpha':[0.01,0.75],
                               'reg_lambda':[1e-5,1e-4],
                               'subsample':[0.75]#,0.7,0.8
                             }
    xgb_model = xgb.XGBRegressor()
    gs_xg = GridSearchCV( estimator = xgb_model, param_grid = parameters_for_testing,
                             n_jobs=4,iid=False, verbose=1,scoring='neg_mean_squared_error', cv = 5).fit(X,y, eval_metric='rmse')
    X_iq_train, X_iq_test, Y_iq_train, Y_iq_test = train_test_split( X, y, shuffle = False)
    gs_xg.best_estimator_.fit( X_iq_train, Y_iq_train )
    Y_iq_pred = gs_xg.best_estimator_.predict(X_iq_test).astype(int)
    print ("Iquitos Test  MAE error :", mean_absolute_error(Y_iq_pred, Y_iq_test))
    pred_train_iq = gs_xg.best_estimator_.predict(X_iq_train).astype(int)
    print ("Iquitos Train MAE error :", mean_absolute_error(pred_train_iq, Y_iq_train))
    X_test = pd.DataFrame( dengue_test_iq_month, columns = X.columns )
    submit_pred_iq = pd.DataFrame( dengue_test_iq_month, columns = ['city', 'week_start_date'])
    submit_pred_iq['total_cases'] = gs_xg.best_estimator_.predict(X_test).astype( 'int')
    sub_iq = sub_iq.append( submit_pred_iq )
    #submission = pd.merge( submission, submit_pred_iq, on =['city','year','weekofyear'])
sub_iq = sub_iq.drop( columns=['week_start_date'])
sub_iq.reset_index( inplace = True )
sub_iq['year'] = sub_iq['week_start_date'].dt.year
sub_iq['weekofyear'] = sub_iq['week_start_date'].dt.weekofyear
sub_iq.drop( columns = ['week_start_date'], inplace = True )

Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   12.9s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   30.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   55.3s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.7min
[Parallel(n_jobs=4)]: Done 3192 tasks      | elapsed:  3.5min
[Parallel(n_jobs=4)]: Done 4042 tasks      | elapsed:  4.4min
[Parallel(n_jobs=4)]: Done 4992 tasks      | elapsed:  5.6min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  6.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 11.846153846153847
Iquitos Train MAE error : 7.243243243243243
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   11.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   26.4s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   47.7s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 2777 tasks      | elapsed:  2.6min
[Parallel(n_jobs=4)]: Done 4277 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done 5287 tasks      | elapsed:  5.0min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  5.5min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 9.4
Iquitos Train MAE error : 7.966666666666667
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   12.8s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   29.5s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   51.6s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 3192 tasks      | elapsed:  3.1min
[Parallel(n_jobs=4)]: Done 4042 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done 4992 tasks      | elapsed:  5.0min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  5.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 3.2
Iquitos Train MAE error : 2.433333333333333
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.6s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   13.6s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   31.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   57.4s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  2.1min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done 3192 tasks      | elapsed:  3.6min
[Parallel(n_jobs=4)]: Done 4042 tasks      | elapsed:  4.6min
[Parallel(n_jobs=4)]: Done 4992 tasks      | elapsed:  5.8min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  6.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 2.230769230769231
Iquitos Train MAE error : 4.216216216216216
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   11.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   26.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   48.7s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.5min
[Parallel(n_jobs=4)]: Done 3192 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 4180 tasks      | elapsed:  4.2min
[Parallel(n_jobs=4)]: Done 5130 tasks      | elapsed:  5.3min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  5.9min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 2.6
Iquitos Train MAE error : 3.533333333333333
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   11.3s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   26.1s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   48.1s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 3192 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 4042 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done 4992 tasks      | elapsed:  5.1min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  6.0min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 1.8
Iquitos Train MAE error : 2.433333333333333
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   13.8s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   31.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   57.1s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  2.1min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.8min
[Parallel(n_jobs=4)]: Done 3192 tasks      | elapsed:  3.6min
[Parallel(n_jobs=4)]: Done 4042 tasks      | elapsed:  4.5min
[Parallel(n_jobs=4)]: Done 4992 tasks      | elapsed:  5.7min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  6.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 2.0833333333333335
Iquitos Train MAE error : 0.3235294117647059
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   10.8s
[Parallel(n_jobs=4)]: Done 643 tasks      | elapsed:   37.0s
[Parallel(n_jobs=4)]: Done 1343 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1861 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 2898 tasks      | elapsed:  2.7min
[Parallel(n_jobs=4)]: Done 4198 tasks      | elapsed:  3.8min
[Parallel(n_jobs=4)]: Done 5147 tasks      | elapsed:  4.8min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  5.4min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 2.9
Iquitos Train MAE error : 2.3666666666666667
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   11.7s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   26.4s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   52.4s
[Parallel(n_jobs=4)]: Done 1278 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 2378 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 3661 tasks      | elapsed:  3.6min
[Parallel(n_jobs=4)]: Done 4411 tasks      | elapsed:  4.4min
[Parallel(n_jobs=4)]: Done 5261 tasks      | elapsed:  5.4min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  6.0min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 9.818181818181818
Iquitos Train MAE error : 4.96875
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.8s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   12.8s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   30.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   54.6s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  2.0min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed:  2.6min
[Parallel(n_jobs=4)]: Done 3262 tasks      | elapsed:  3.4min
[Parallel(n_jobs=4)]: Done 4540 tasks      | elapsed:  4.7min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  6.1min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 16.416666666666668
Iquitos Train MAE error : 6.257142857142857
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   15.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   31.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   53.8s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 2199 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 3499 tasks      | elapsed:  3.3min
[Parallel(n_jobs=4)]: Done 4798 tasks      | elapsed:  4.6min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  5.5min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 7.4
Iquitos Train MAE error : 2.066666666666667
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.4s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   11.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   26.1s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:   47.2s
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 2478 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 3978 tasks      | elapsed:  3.7min
[Parallel(n_jobs=4)]: Done 5138 tasks      | elapsed:  4.9min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  5.5min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 10.1
Iquitos Train MAE error : 15.766666666666667


In [6]:
submit = sub_sj.append( sub_iq, sort=True )

In [7]:
test = pd.merge( submission, submit, on =['city','year','weekofyear'], how='left')

In [8]:
test.to_csv("data/submission_xgb_monthwise.csv", index = False)