In [1]:
import pandas as pd    
pd.set_option('mode.chained_assignment', None)
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import make_scorer

from aux_fun import my_eval, my_grid_search_cv

import json

In [2]:
df = pd.read_csv('./data/train_imputed.csv')

In [3]:
y = df['NumberOfSales']

In [4]:
X = df.drop(df[['NumberOfSales','NumberOfCustomers', 'WindDirDegrees']], axis=1)

# Train-Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train = X_train.reset_index(drop =True)
y_train = y_train.reset_index(drop =True)

# Find the combination with best parameters

In [7]:
#parameters
params_array = [
    {'loss' : 'linear'},
     {'loss' : 'square'},
    {'loss' : 'exponential'},
    {'n_estimators' : 50},  
]

In [8]:
results = my_grid_search_cv(AdaBoostRegressor(), params_array, X_train, y_train, n_folds=5)

In [9]:
results

Unnamed: 0,Method,Folds,Parameters,Eval_test,R2,R2_month
0,"(DecisionTreeRegressor(criterion='mse', max_de...",5,{'loss': 'linear'},0.435118,0.248521,0.084426
1,"(DecisionTreeRegressor(criterion='mse', max_de...",5,{'loss': 'square'},0.665756,-0.466864,-0.943168
2,"(DecisionTreeRegressor(criterion='mse', max_de...",5,{'loss': 'exponential'},0.812189,-0.798533,-1.696832
3,"(DecisionTreeRegressor(criterion='mse', max_de...",5,{'n_estimators': 50},0.485251,0.135233,-0.082367


In [10]:
results.to_csv('AdaBoostRegressor.csv')

# Model Building

In [11]:
best_params = results[results['Eval_test']==min(results['Eval_test'])]['Parameters'].reset_index(drop =True)[0]

In [12]:
best_params

{'loss': 'linear'}

In [13]:
ABR = AdaBoostRegressor(**best_params)
ABR.fit(X_train, y_train)

AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
         n_estimators=50, random_state=None)

In [14]:
y_pred = ABR.predict(X_test)

In [15]:
print(y_pred)

[ 7812.48395316     0.          8835.64667105 ...,  9027.61153465
  9027.61153465     0.        ]


In [16]:
print(y_test)

199480     3452
492279        0
500907     4679
63763      5232
74453         0
393492     4237
10444      3390
375668    11558
310103     2457
248408     8679
172477        0
481334     4816
141523     1766
57856      3305
19144      4121
442714     2509
198615     4005
102388     5892
201752     3889
249986     6864
385543        0
126690     2853
201562     5713
494046     5668
234057     4616
83925      7623
152551     4421
421235     4196
270066     5432
153996     3371
          ...  
183905     3828
67033      5458
356838     5935
37337         0
1022       4483
85432      4344
236582        0
12614      3003
323110     5531
317127     3944
514921     4167
310007        0
215660        0
406690     5104
91533      7221
303651        0
353483     4606
256305     4640
24320      3252
306740     3844
240634     6042
149711     3578
309371     4153
3414       4114
298003     5717
440202     5429
71752      6420
282435     5302
263394     5104
39            0
Name: NumberOfSales, Len

In [17]:
evaluation = my_eval(X_test, y_test, y_pred)
evaluation

0.58530257067638647

In [18]:
r2 = r2_score(y_test, y_pred)
r2

-0.16555613118640955

# Creation of the submission 

In [19]:
model_result={'method': 'AdaBoostRegressor',
       'parameters':len(params_array),
       'evaluation': evaluation, 
       'r2':r2,
       'notes': 'None'
    }

In [20]:
with open('results.json', 'a') as fp:
    json.dump(model_result, fp, indent=2)
    fp.write('\n')

# 