In [5]:
import pandas as pd
import numpy as np

#from xgboost.sklearn import XGBRegressor
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [2]:
train_filename_iq = ( './datasets/dengue_test_iq.pkl' )
dengue_test_iq = pd.read_pickle( train_filename_iq )

test_filename_sj = ( './datasets/dengue_test_sj.pkl' )
dengue_test_sj = pd.read_pickle( test_filename_sj )

submission_sj = pd.DataFrame( dengue_test_sj, columns = ['city'])
submission_iq = pd.DataFrame( dengue_test_iq, columns = ['city'])
submission_sj.reset_index( inplace = True)
submission_sj['year'] = submission_sj['week_start_date'].dt.year
submission_sj['weekofyear'] = submission_sj['week_start_date'].dt.weekofyear
submission_iq.reset_index( inplace = True)
submission_iq['year'] = submission_iq['week_start_date'].dt.year
submission_iq['weekofyear'] = submission_iq['week_start_date'].dt.weekofyear
submission = submission_sj.append( submission_iq )
submission.drop( columns = ['week_start_date'], inplace = True )

In [6]:
sub_sj = pd.DataFrame()
sub_iq = pd.DataFrame()

In [8]:
for month in range( 1, 13 ):
    train_filename = ( './datasets/train_sj_month_' + str( month ) + '.pkl' )
    test_filename  = ( './datasets/test_sj_month_' + str( month ) + '.pkl' )
    dengue_train_sj_month = pd.read_pickle( train_filename )
    dengue_test_sj_month  = pd.read_pickle( test_filename )
    X = pd.DataFrame( dengue_train_sj_month )
    X.drop( columns = ['city','year','total_cases','month'], inplace = True )
    y = dengue_train_sj_month.total_cases
    parameters_for_testing = { 'learning_rate':[0.1,0.01],
                               'max_depth':[2,3,4],
                               'n_estimators':[1000,500],
                               'gamma':[0,0.01],
                               'min_child_weight':[12,15,20],
                               'colsample_bytree':[0.5,0.4,0.6],
                               'reg_alpha':[0.01],
                               'reg_lambda':[1e-5],
                               'subsample':[0.95]
                             }
    xgb_model = xgb.XGBRegressor()
    gs_xg = GridSearchCV( estimator = xgb_model, param_grid = parameters_for_testing,
                             n_jobs=4,iid=False, verbose=1,scoring='neg_mean_squared_error', cv= 5).fit(X,y, eval_metric='rmse')
    X_sj_train, X_sj_test, Y_sj_train, Y_sj_test = train_test_split( X, y, shuffle = False)
    Y_sj_pred = gs_xg.best_estimator_.predict(X_sj_test).astype(int)
    print ("San Juan Test  MAE error :", mean_absolute_error(Y_sj_pred, Y_sj_test))
    pred_train_sj = gs_xg.best_estimator_.predict(X_sj_train).astype(int)
    print ("San Juan Train MAE error :", mean_absolute_error(pred_train_sj, Y_sj_train))
    X_test = pd.DataFrame( dengue_test_sj_month, columns = X.columns )
    submit_pred_sj = pd.DataFrame( dengue_test_sj_month, columns = ['city','week_start_date'])
    submit_pred_sj['total_cases'] = gs_xg.predict(X_test).astype( 'int')
    sub_sj = sub_sj.append( submit_pred_sj )
sub_sj = sub_sj.drop( columns=['week_start_date'])
sub_sj.reset_index( inplace = True )
sub_sj['year'] = sub_sj['week_start_date'].dt.year
sub_sj['weekofyear'] = sub_sj['week_start_date'].dt.weekofyear
sub_sj.drop( columns = ['week_start_date'], inplace = True )

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   23.0s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   50.5s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.3min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 1.9565217391304348
San Juan Train MAE error : 3.5970149253731343
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   19.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   44.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 2.4444444444444446
San Juan Train MAE error : 5.314814814814815
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   18.4s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   41.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 2.2777777777777777
San Juan Train MAE error : 4.685185185185185
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   23.6s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   55.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.2min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.43478260869565216
San Juan Train MAE error : 0.4090909090909091
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   18.2s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   41.7s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.7min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.5
San Juan Train MAE error : 0.5
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   18.5s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   41.3s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.7min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


San Juan Test  MAE error : 3.4444444444444446
San Juan Train MAE error : 6.425925925925926
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   22.8s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   51.9s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.2min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 8.695652173913043
San Juan Train MAE error : 12.253731343283581
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   19.2s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   44.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 21.333333333333332
San Juan Train MAE error : 21.87037037037037
Fitting 5 folds for each of 216 candidates, totalling 1080 fits

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   18.9s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   42.6s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \



San Juan Test  MAE error : 10.736842105263158
San Juan Train MAE error : 18.50877192982456
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    5.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   23.7s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   52.1s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  2.2min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.5
San Juan Train MAE error : 0.5625
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.2s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   20.0s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   45.2s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.3min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 0.5555555555555556
San Juan Train MAE error : 0.46296296296296297
Fitting 5 folds for each of 216 candidates, totalling 1080 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    3.9s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   18.0s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   40.8s
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 1080 out of 1080 | elapsed:  1.7min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


San Juan Test  MAE error : 7.388888888888889
San Juan Train MAE error : 16.90740740740741


In [None]:
for month in range( 1, 13 ):
    train_filename = ( './datasets/train_iq_month_' + str( month ) + '.pkl' )
    test_filename  = ( './datasets/test_iq_month_' + str( month ) + '.pkl' )
    dengue_train_iq_month = pd.read_pickle( train_filename )
    dengue_test_iq_month  = pd.read_pickle( test_filename )
    X = pd.DataFrame( dengue_train_iq_month )
    X.drop( columns = ['city','year','total_cases','month'], inplace = True )
    y = dengue_train_iq_month.total_cases
    parameters_for_testing = { 'learning_rate':[0.1,0.01],#
                               'max_depth':[2,4],
                               'n_estimators':[700,500,250],
                               'gamma':[0,0.01],
                               'min_child_weight':[5,10,15,20],
                               'colsample_bytree':[0.5,0.4,0.6],
                               'reg_alpha':[0.01,0.75],
                               'reg_lambda':[1e-5,1e-4],
                               'subsample':[0.75]#,0.7,0.8
                             }
    xgb_model = xgb.XGBRegressor()
    gs_xg = GridSearchCV( estimator = xgb_model, param_grid = parameters_for_testing,
                             n_jobs=4,iid=False, verbose=1,scoring='neg_mean_squared_error', cv = 5).fit(X,y, eval_metric='rmse')
    X_iq_train, X_iq_test, Y_iq_train, Y_iq_test = train_test_split( X, y, shuffle = False)
    gs_xg.best_estimator_.fit( X_iq_train, Y_iq_train )
    Y_iq_pred = gs_xg.best_estimator_.predict(X_iq_test).astype(int)
    print ("Iquitos Test  MAE error :", mean_absolute_error(Y_iq_pred, Y_iq_test))
    pred_train_iq = gs_xg.best_estimator_.predict(X_iq_train).astype(int)
    print ("Iquitos Train MAE error :", mean_absolute_error(pred_train_iq, Y_iq_train))
    X_test = pd.DataFrame( dengue_test_iq_month, columns = X.columns )
    submit_pred_iq = pd.DataFrame( dengue_test_iq_month, columns = ['city', 'week_start_date'])
    submit_pred_iq['total_cases'] = gs_xg.best_estimator_.predict(X_test).astype( 'int')
    sub_iq = sub_iq.append( submit_pred_iq )
    #submission = pd.merge( submission, submit_pred_iq, on =['city','year','weekofyear'])
sub_iq = sub_iq.drop( columns=['week_start_date'])
sub_iq.reset_index( inplace = True )
sub_iq['year'] = sub_iq['week_start_date'].dt.year
sub_iq['weekofyear'] = sub_iq['week_start_date'].dt.weekofyear
sub_iq.drop( columns = ['week_start_date'], inplace = True )

Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.4s
[Parallel(n_jobs=4)]: Done 205 tasks      | elapsed:    9.5s
[Parallel(n_jobs=4)]: Done 705 tasks      | elapsed:   31.2s
[Parallel(n_jobs=4)]: Done 1405 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 2305 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 3405 tasks      | elapsed:  2.5min
[Parallel(n_jobs=4)]: Done 4705 tasks      | elapsed:  3.5min
[Parallel(n_jobs=4)]: Done 5753 out of 5760 | elapsed:  4.5min remaining:    0.2s
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  4.5min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 11.846153846153847
Iquitos Train MAE error : 7.243243243243243
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.4s
[Parallel(n_jobs=4)]: Done 210 tasks      | elapsed:    9.2s
[Parallel(n_jobs=4)]: Done 710 tasks      | elapsed:   28.9s
[Parallel(n_jobs=4)]: Done 1410 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done 2310 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done 3410 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 4710 tasks      | elapsed:  3.0min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.7min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 9.4
Iquitos Train MAE error : 7.966666666666667
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.1s
[Parallel(n_jobs=4)]: Done 262 tasks      | elapsed:   10.1s
[Parallel(n_jobs=4)]: Done 762 tasks      | elapsed:   29.3s
[Parallel(n_jobs=4)]: Done 1462 tasks      | elapsed:   55.7s
[Parallel(n_jobs=4)]: Done 2362 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 3462 tasks      | elapsed:  2.1min
[Parallel(n_jobs=4)]: Done 4762 tasks      | elapsed:  3.1min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 3.4
Iquitos Train MAE error : 1.5333333333333334
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.5s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    9.0s
[Parallel(n_jobs=4)]: Done 649 tasks      | elapsed:   29.4s
[Parallel(n_jobs=4)]: Done 1349 tasks      | elapsed:  1.0min
[Parallel(n_jobs=4)]: Done 2249 tasks      | elapsed:  1.8min
[Parallel(n_jobs=4)]: Done 3349 tasks      | elapsed:  2.7min
[Parallel(n_jobs=4)]: Done 4649 tasks      | elapsed:  3.8min
[Parallel(n_jobs=4)]: Done 5753 out of 5760 | elapsed:  4.8min remaining:    0.3s
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  4.8min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 2.230769230769231
Iquitos Train MAE error : 4.216216216216216
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.8s
[Parallel(n_jobs=4)]: Done 227 tasks      | elapsed:    9.4s
[Parallel(n_jobs=4)]: Done 1011 tasks      | elapsed:   41.4s
[Parallel(n_jobs=4)]: Done 1711 tasks      | elapsed:  1.2min
[Parallel(n_jobs=4)]: Done 2611 tasks      | elapsed:  1.7min
[Parallel(n_jobs=4)]: Done 3711 tasks      | elapsed:  2.4min
[Parallel(n_jobs=4)]: Done 5011 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 5753 out of 5760 | elapsed:  3.7min remaining:    0.2s
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.7min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \



Iquitos Test  MAE error : 2.6
Iquitos Train MAE error : 3.533333333333333
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done 233 tasks      | elapsed:    8.4s
[Parallel(n_jobs=4)]: Done 1233 tasks      | elapsed:   46.9s
[Parallel(n_jobs=4)]: Done 2633 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done 4433 tasks      | elapsed:  2.7min
[Parallel(n_jobs=4)]: Done 5625 tasks      | elapsed:  3.5min
[Parallel(n_jobs=4)]: Done 5753 out of 5760 | elapsed:  3.6min remaining:    0.2s
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Iquitos Test  MAE error : 1.8
Iquitos Train MAE error : 2.433333333333333
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.4s
[Parallel(n_jobs=4)]: Done 208 tasks      | elapsed:    8.9s
[Parallel(n_jobs=4)]: Done 708 tasks      | elapsed:   28.6s
[Parallel(n_jobs=4)]: Done 1408 tasks      | elapsed:   58.4s
[Parallel(n_jobs=4)]: Done 2308 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done 3408 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 4708 tasks      | elapsed:  3.2min
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.9min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 2.0833333333333335
Iquitos Train MAE error : 0.4117647058823529
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done 263 tasks      | elapsed:    9.8s
[Parallel(n_jobs=4)]: Done 763 tasks      | elapsed:   28.9s
[Parallel(n_jobs=4)]: Done 1463 tasks      | elapsed:   54.4s
[Parallel(n_jobs=4)]: Done 2363 tasks      | elapsed:  1.4min
[Parallel(n_jobs=4)]: Done 3463 tasks      | elapsed:  2.1min
[Parallel(n_jobs=4)]: Done 4763 tasks      | elapsed:  2.9min
[Parallel(n_jobs=4)]: Done 5753 out of 5760 | elapsed:  3.6min remaining:    0.2s
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.6min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 1.4
Iquitos Train MAE error : 0.3
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done 205 tasks      | elapsed:    8.7s
[Parallel(n_jobs=4)]: Done 705 tasks      | elapsed:   28.9s
[Parallel(n_jobs=4)]: Done 1405 tasks      | elapsed:   57.1s
[Parallel(n_jobs=4)]: Done 2305 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 3405 tasks      | elapsed:  2.2min
[Parallel(n_jobs=4)]: Done 4705 tasks      | elapsed:  3.1min
[Parallel(n_jobs=4)]: Done 5753 out of 5760 | elapsed:  3.9min remaining:    0.2s
[Parallel(n_jobs=4)]: Done 5760 out of 5760 | elapsed:  3.9min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


Iquitos Test  MAE error : 9.818181818181818
Iquitos Train MAE error : 4.96875
Fitting 5 folds for each of 1152 candidates, totalling 5760 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    2.7s
[Parallel(n_jobs=4)]: Done 207 tasks      | elapsed:    9.3s
[Parallel(n_jobs=4)]: Done 707 tasks      | elapsed:   30.5s
[Parallel(n_jobs=4)]: Done 1407 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 2307 tasks      | elapsed:  1.7min


In [None]:
submit = sub_sj.append( sub_iq, sort=True )

In [None]:
test = pd.merge( submission, submit, on =['city','year','weekofyear'], how='left')

In [None]:
test.to_csv("data/submission_xgb_monthwise.csv", index = False)