In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.svm import LinearSVR
import random
import pickle as pkl

In [2]:
random.seed(24)

In [3]:
eco_fac = pd.read_csv('eco_india.csv')
eco_fac.head()

Unnamed: 0,Unnamed: 1,GDP_US,INFL_US,GDP,PPP,INFL,FER
0,1981,13976.1,10.33471,270.5,3.752,13.11255,8.658523
1,1982,14433.8,6.131427,274.1,3.84,7.89075,9.455132
2,1983,15543.9,3.212435,291.2,4.093,11.86807,10.098898
3,1984,17121.2,4.300536,276.7,4.266,8.318914,11.362583
4,1985,18236.8,3.545644,296.4,4.431,5.556426,12.36875


In [4]:
df = pd.read_csv('weights_15.csv')
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0.025855,0.125928,0.054118,0.067509,0.037793,0.062103,0.057768,0.093079,0.032104,0.069524,0.053799,0.089232,0.020576,0.195674,0.01494
1,0.02439,0.107464,0.049101,0.059169,0.034891,0.060548,0.056063,0.094063,0.027917,0.074106,0.04595,0.088189,0.022177,0.239667,0.016304
2,0.02545,0.076065,0.035598,0.047638,0.028261,0.041214,0.046687,0.07944,0.030794,0.057839,0.034091,0.071603,0.016388,0.389751,0.019181
3,0.026685,0.087435,0.042064,0.061242,0.031366,0.05122,0.059974,0.104375,0.038531,0.06846,0.035101,0.074283,0.016077,0.283385,0.019803
4,0.028514,0.077057,0.046638,0.060663,0.031233,0.060692,0.060507,0.107761,0.041098,0.076824,0.038589,0.085528,0.015696,0.251406,0.017794


In [5]:
df.isna().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
dtype: int64

In [6]:
new_fer = eco_fac.FER.shift(-1)
new_fer = pd.DataFrame(new_fer)
new_fer.rename(columns = {'FER':'NEXT_YR_FER'}, inplace=True)

In [7]:
data = pd.concat([eco_fac,new_fer],axis=1)
data.head()

Unnamed: 0,Unnamed: 1,GDP_US,INFL_US,GDP,PPP,INFL,FER,NEXT_YR_FER
0,1981,13976.1,10.33471,270.5,3.752,13.11255,8.658523,9.455132
1,1982,14433.8,6.131427,274.1,3.84,7.89075,9.455132,10.098898
2,1983,15543.9,3.212435,291.2,4.093,11.86807,10.098898,11.362583
3,1984,17121.2,4.300536,276.7,4.266,8.318914,11.362583,12.36875
4,1985,18236.8,3.545644,296.4,4.431,5.556426,12.36875,12.610833


In [14]:
data = pd.concat([df,data],axis=1)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,GDP,FER,NEXT_YR_FER
0,0.025855,0.125928,0.054118,0.067509,0.037793,0.062103,0.057768,0.093079,0.032104,0.069524,0.053799,0.089232,0.020576,0.195674,0.01494,270.5,8.658523,9.455132
1,0.02439,0.107464,0.049101,0.059169,0.034891,0.060548,0.056063,0.094063,0.027917,0.074106,0.04595,0.088189,0.022177,0.239667,0.016304,274.1,9.455132,10.098898
2,0.02545,0.076065,0.035598,0.047638,0.028261,0.041214,0.046687,0.07944,0.030794,0.057839,0.034091,0.071603,0.016388,0.389751,0.019181,291.2,10.098898,11.362583
3,0.026685,0.087435,0.042064,0.061242,0.031366,0.05122,0.059974,0.104375,0.038531,0.06846,0.035101,0.074283,0.016077,0.283385,0.019803,276.7,11.362583,12.36875
4,0.028514,0.077057,0.046638,0.060663,0.031233,0.060692,0.060507,0.107761,0.041098,0.076824,0.038589,0.085528,0.015696,0.251406,0.017794,296.4,12.36875,12.610833


In [15]:
data = data.dropna()

In [16]:
data.shape

(38, 18)

In [17]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,GDP,FER,NEXT_YR_FER
0,0.025855,0.125928,0.054118,0.067509,0.037793,0.062103,0.057768,0.093079,0.032104,0.069524,0.053799,0.089232,0.020576,0.195674,0.01494,270.5,8.658523,9.455132
1,0.02439,0.107464,0.049101,0.059169,0.034891,0.060548,0.056063,0.094063,0.027917,0.074106,0.04595,0.088189,0.022177,0.239667,0.016304,274.1,9.455132,10.098898
2,0.02545,0.076065,0.035598,0.047638,0.028261,0.041214,0.046687,0.07944,0.030794,0.057839,0.034091,0.071603,0.016388,0.389751,0.019181,291.2,10.098898,11.362583
3,0.026685,0.087435,0.042064,0.061242,0.031366,0.05122,0.059974,0.104375,0.038531,0.06846,0.035101,0.074283,0.016077,0.283385,0.019803,276.7,11.362583,12.36875
4,0.028514,0.077057,0.046638,0.060663,0.031233,0.060692,0.060507,0.107761,0.041098,0.076824,0.038589,0.085528,0.015696,0.251406,0.017794,296.4,12.36875,12.610833
5,0.026763,0.065335,0.035236,0.05267,0.025731,0.052264,0.047317,0.098449,0.031422,0.054511,0.030486,0.067872,0.012871,0.376813,0.02226,310.5,12.610833,12.9615
6,0.025201,0.064009,0.031598,0.044123,0.023431,0.045257,0.040083,0.088396,0.025496,0.038581,0.025239,0.056042,0.011981,0.458331,0.022232,340.4,12.9615,13.917083
7,0.027945,0.087123,0.040085,0.059586,0.030004,0.056853,0.061352,0.114146,0.035187,0.056659,0.031126,0.066493,0.017816,0.297039,0.018587,354.1,13.917083,16.2255
8,0.023022,0.060581,0.030364,0.047476,0.022982,0.043616,0.043212,0.094296,0.027974,0.039163,0.025089,0.040992,0.009673,0.469564,0.021995,346.1,16.2255,17.5035
9,0.030401,0.089313,0.047727,0.069226,0.035643,0.069991,0.069092,0.109811,0.035336,0.0586,0.037666,0.064204,0.023158,0.238007,0.021825,367.6,17.5035,22.742433


In [23]:
2019-2011

8

In [24]:
data.iloc[2018-1981,:].values

array([2.13351398e-02, 4.36247614e-02, 7.52677468e-02, 1.45327007e-01,
       5.31480134e-02, 2.46785489e-01, 7.39064981e-02, 5.38001246e-02,
       4.98855526e-03, 1.09706994e-01, 5.09929441e-02, 8.01064048e-02,
       2.19309983e-02, 1.84339911e-03, 1.72359251e-02, 2.00590000e+03,
       6.83894670e+01, 7.04199690e+01])

In [12]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [14]:
minmax = MinMaxScaler()
scaler = StandardScaler()

In [15]:
X_s = scaler.fit_transform(X) # standard scaled data
X_m = minmax.fit_transform(X) # minmax scaled data

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None)
X_train_s, X_test_s, y_train, y_test = train_test_split(X_s, y, test_size=0.2, random_state=None)
X_train_m, X_test_m, y_train, y_test = train_test_split(X_m, y, test_size=0.2, random_state=None)

In [20]:
# Change model here

from sklearn.model_selection import cross_val_score
score = cross_val_score(LinearSVR(C=25.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=1e-05), X_train, y_train, cv=5, scoring='neg_mean_squared_error')
score.mean()

-377.1490621239258

In [21]:
# Change model here

score = cross_val_score(LinearSVR(C=25.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=1e-05), X_train_m, y_train, cv=5, scoring='neg_mean_squared_error')
score.mean()

-5.997514118192359

In [22]:
# Change model here

score = cross_val_score(LinearSVR(C=25.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=1e-05), X_train_s, y_train, cv=5, scoring='neg_mean_squared_error')
score.mean()

-891.5935253046366

In [24]:
# Original 
model_orig = LinearSVR(C=25.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=1e-05) # change model here
model_orig.fit(X_train, y_train)
pred = model_orig.predict(X_test)
print('MSE :',mean_squared_error(pred, y_test))
print('R2 :',r2_score(pred,y_test))
print('Error :',((y_test-pred)/y_test).mean())

MSE : 329.6558574251652
R2 : -23.044914256258544
Error : -0.30056321311489687


In [25]:
(y_test-pred) # check the variations

36    27.242135
25     7.411177
27    14.429089
31    18.793466
11   -11.752471
20     4.512502
3    -28.378528
8    -17.747404
Name: NEXT_YR_FER, dtype: float64

In [28]:
# MinMax Scaled 
model_minmax = LinearSVR(C=25.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=1e-05) # Change model here
model_minmax.fit(X_train_m, y_train)
pred = model_minmax.predict(X_test_m)
print('MSE :',mean_squared_error(pred, y_test))
print('R2 :',r2_score(pred,y_test))
print('Error : ',((y_test-pred)/y_test).mean())

MSE : 13.423766793086806
R2 : 0.9563652999582131
Error :  0.014677947093645482


In [29]:
(y_test-pred) # check the variation

36    3.340412
25   -4.949296
27    6.876460
31   -0.523200
11    4.410520
20    1.665569
3    -1.380054
8    -0.213223
Name: NEXT_YR_FER, dtype: float64

In [30]:
# Standard Scaled 
model_stand = LinearSVR(C=25.0, dual=False, epsilon=0.0001, loss="squared_epsilon_insensitive", tol=1e-05) # Change model here
model_stand.fit(X_train_s, y_train)
pred = model_stand.predict(X_test_s)
print('MSE :',mean_squared_error(pred, y_test))
print('R2 :',r2_score(pred,y_test))
print('Error : ',((y_test-pred)/y_test).mean())

MSE : 451.9503980923193
R2 : -1.7452675888206137
Error :  -0.48877531659482565


In [31]:
(y_test-pred) # check the variations

36    29.516058
25    -2.853654
27    -0.923537
31    11.235929
11    13.514586
20   -17.805047
3    -34.557122
8    -30.254004
Name: NEXT_YR_FER, dtype: float64

In [32]:
# Save the best perform ming model only after instructions

# pkl.dump(model_minmax, open('model_india.pkl','wb'))

In [33]:
# dont uncomment until said to
# if we use scaling
# pkl.dump(minmax, open('scaler_india.pkl','wb'))

In [26]:
np.round(72.69/109.02,2)

0.67