In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import AdaBoostRegressor
import random
import pickle as pkl

In [22]:
random.seed(24)

In [23]:
eco_fac = pd.read_csv('eco_switzerland.csv')
eco_fac.head()

Unnamed: 0,Unnamed: 1,GDP_US,INFL_US,GDP,PPP,INFL,FER
0,1981,13976.1,10.33471,17153.4,1.949529,6.49031,1.964242
1,1982,14433.8,6.131427,17478.7,1.971007,5.655101,2.030275
2,1983,15543.9,3.212435,17342.0,1.94161,2.949831,2.099142
3,1984,17121.2,4.300536,16499.1,1.944457,2.931436,2.349683
4,1985,18236.8,3.545644,16655.3,1.92765,3.43539,2.457125


In [24]:
df = pd.read_csv('weights_15.csv')
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0.025855,0.125928,0.054118,0.067509,0.037793,0.062103,0.057768,0.093079,0.032104,0.069524,0.053799,0.089232,0.020576,0.195674,0.01494
1,0.02439,0.107464,0.049101,0.059169,0.034891,0.060548,0.056063,0.094063,0.027917,0.074106,0.04595,0.088189,0.022177,0.239667,0.016304
2,0.02545,0.076065,0.035598,0.047638,0.028261,0.041214,0.046687,0.07944,0.030794,0.057839,0.034091,0.071603,0.016388,0.389751,0.019181
3,0.026685,0.087435,0.042064,0.061242,0.031366,0.05122,0.059974,0.104375,0.038531,0.06846,0.035101,0.074283,0.016077,0.283385,0.019803
4,0.028514,0.077057,0.046638,0.060663,0.031233,0.060692,0.060507,0.107761,0.041098,0.076824,0.038589,0.085528,0.015696,0.251406,0.017794


In [25]:
df.isna().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
dtype: int64

In [26]:
new_fer = eco_fac.FER.shift(-1)
new_fer = pd.DataFrame(new_fer)
new_fer.rename(columns = {'FER':'NEXT_YR_FER'}, inplace=True)

In [27]:
data = pd.concat([eco_fac,new_fer],axis=1)
data.head()

Unnamed: 0,Unnamed: 1,GDP_US,INFL_US,GDP,PPP,INFL,FER,NEXT_YR_FER
0,1981,13976.1,10.33471,17153.4,1.949529,6.49031,1.964242,2.030275
1,1982,14433.8,6.131427,17478.7,1.971007,5.655101,2.030275,2.099142
2,1983,15543.9,3.212435,17342.0,1.94161,2.949831,2.099142,2.349683
3,1984,17121.2,4.300536,16499.1,1.944457,2.931436,2.349683,2.457125
4,1985,18236.8,3.545644,16655.3,1.92765,3.43539,2.457125,1.798917


In [29]:
data = pd.concat([df,data],axis=1)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,GDP,FER,NEXT_YR_FER
0,0.025855,0.125928,0.054118,0.067509,0.037793,0.062103,0.057768,0.093079,0.032104,0.069524,0.053799,0.089232,0.020576,0.195674,0.01494,17153.4,1.964242,2.030275
1,0.02439,0.107464,0.049101,0.059169,0.034891,0.060548,0.056063,0.094063,0.027917,0.074106,0.04595,0.088189,0.022177,0.239667,0.016304,17478.7,2.030275,2.099142
2,0.02545,0.076065,0.035598,0.047638,0.028261,0.041214,0.046687,0.07944,0.030794,0.057839,0.034091,0.071603,0.016388,0.389751,0.019181,17342.0,2.099142,2.349683
3,0.026685,0.087435,0.042064,0.061242,0.031366,0.05122,0.059974,0.104375,0.038531,0.06846,0.035101,0.074283,0.016077,0.283385,0.019803,16499.1,2.349683,2.457125
4,0.028514,0.077057,0.046638,0.060663,0.031233,0.060692,0.060507,0.107761,0.041098,0.076824,0.038589,0.085528,0.015696,0.251406,0.017794,16655.3,2.457125,1.798917


In [30]:
data = data.dropna()

In [31]:
data.shape

(38, 18)

In [32]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

In [33]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [34]:
minmax = MinMaxScaler()
scaler = StandardScaler()

In [35]:
X_s = scaler.fit_transform(X) # standard scaled data
X_m = minmax.fit_transform(X) # minmax scaled data

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None)
X_train_s, X_test_s, y_train, y_test = train_test_split(X_s, y, test_size=0.2, random_state=None)
X_train_m, X_test_m, y_train, y_test = train_test_split(X_m, y, test_size=0.2, random_state=None)

In [37]:
# Change model here

from sklearn.model_selection import cross_val_score
score = cross_val_score(AdaBoostRegressor(learning_rate=0.001, loss="linear", n_estimators=100), X_train, y_train, cv=5, scoring='neg_mean_squared_error')
score.mean()

-0.18403430930558046

In [38]:
# Change model here

score = cross_val_score(AdaBoostRegressor(learning_rate=0.001, loss="linear", n_estimators=100), X_train_m, y_train, cv=5, scoring='neg_mean_squared_error')
score.mean()

-0.03801326629243335

In [39]:
# Change model here

score = cross_val_score(AdaBoostRegressor(learning_rate=0.001, loss="linear", n_estimators=100), X_train_s, y_train, cv=5, scoring='neg_mean_squared_error')
score.mean()

-0.21848644778076737

In [40]:
# Original 
model_orig = AdaBoostRegressor(learning_rate=0.001, loss="linear", n_estimators=100) # change model here
model_orig.fit(X_train, y_train)
pred = model_orig.predict(X_test)
print('MSE :',mean_squared_error(pred, y_test))
print('R2 :',r2_score(pred,y_test))
print('Error : ',((y_test-pred)/y_test).mean())

MSE : 0.2664095887989998
R2 : -24.834927201429576
Error :  -0.037820429899052446


In [41]:
(y_test-pred) # check the variations

26   -0.318618
1     0.837996
20    0.159922
36   -0.326804
4     0.671998
33   -0.349742
34   -0.484987
0     0.621314
Name: NEXT_YR_FER, dtype: float64

In [42]:
# MinMax Scaled 
model_minmax = AdaBoostRegressor(learning_rate=0.001, loss="linear", n_estimators=100) # Change model here
model_minmax.fit(X_train_m, y_train)
pred = model_minmax.predict(X_test_m)
print('MSE :',mean_squared_error(pred, y_test))
print('R2 :',r2_score(pred,y_test))
print('Error : ',((y_test-pred)/y_test).mean())

MSE : 0.07694560815603912
R2 : 0.8124005521471586
Error :  -0.08798322778966514


In [43]:
(y_test-pred) # check the variation

26   -0.117276
1    -0.250541
20   -0.041439
36    0.016695
4    -0.658208
33    0.016298
34    0.038982
0    -0.319408
Name: NEXT_YR_FER, dtype: float64

In [44]:
# Standard Scaled 
model_stand = AdaBoostRegressor(learning_rate=0.001, loss="linear", n_estimators=100) # Change model here
model_stand.fit(X_train_s, y_train)
pred = model_stand.predict(X_test_s)
print('MSE :',mean_squared_error(pred, y_test))
print('R2 :',r2_score(pred,y_test))
print('Error : ',((y_test-pred)/y_test).mean())

MSE : 0.25711391615597984
R2 : -32.005863190051755
Error :  -0.020308946024586483


In [45]:
(y_test-pred) # check the variations

26   -0.205701
1     0.709351
20    0.301190
36   -0.239670
4     0.572568
33   -0.495707
34   -0.436592
0     0.774010
Name: NEXT_YR_FER, dtype: float64

In [48]:
# Save the best perform ming model only after instructions

pkl.dump(model_minmax, open('model_switzerland.pkl','wb'))

In [49]:
# dont uncomment until said to
# if we use scaling
pkl.dump(minmax, open('scaler_switzerland.pkl','wb'))