# Linear Regression Sample Code

- Predict weekly sales for a store/department

In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [2]:
data = pd.read_csv('./data/rossman_reduced.csv')
extra_data = pd.read_csv('./data/rossman-extra.csv')

In [3]:
data.head()

Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday
0,39,59,2012-07-27,202.76,False
1,23,71,2011-02-18,16482.0,False
2,17,40,2010-02-26,48167.29,False
3,6,79,2011-01-07,21581.64,False
4,39,58,2011-05-13,1315.0,False


In [4]:
extra_data = extra_data.drop('IsHoliday',axis=1)  #pull
data = data.merge(extra_data, on=['Store','Date'])


In [5]:
data.shape

(30000, 14)

In [6]:
data = data.fillna(0)

In [7]:
data = pd.get_dummies(data,prefix='dept',columns=['Dept'],drop_first=True)

In [8]:
mdf = data.drop(['Date'],axis=1)

In [9]:
mdf.head()

Unnamed: 0,Store,Weekly_Sales,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,...,dept_90,dept_91,dept_92,dept_93,dept_94,dept_95,dept_96,dept_97,dept_98,dept_99
0,39,202.76,False,82.89,3.407,1476.42,77.44,7.24,1008.3,35419.19,...,0,0,0,0,0,0,0,0,0,0
1,39,35825.85,False,82.89,3.407,1476.42,77.44,7.24,1008.3,35419.19,...,0,0,0,0,0,0,0,0,0,0
2,39,2871.0,False,82.89,3.407,1476.42,77.44,7.24,1008.3,35419.19,...,0,0,0,0,0,0,0,0,0,0
3,39,6697.7,False,82.89,3.407,1476.42,77.44,7.24,1008.3,35419.19,...,0,0,0,0,0,0,0,0,0,0
4,39,9209.19,False,82.89,3.407,1476.42,77.44,7.24,1008.3,35419.19,...,0,0,0,0,0,0,0,0,0,0


In [10]:
mdf.shape

(30000, 91)

In [11]:
X = mdf.drop('Weekly_Sales',axis=1)
y = mdf['Weekly_Sales']

In [12]:
kf = KFold(n_splits=4,shuffle=True,random_state=1)

In [None]:
lr_r2 = []
l1_r2 = []
l2_r2 = []

lr_rmse = []
l1_rmse = []
l2_rmse = []

for tr,te in kf.split(X,y):
#     print("Fold")
    
    lr = LinearRegression()
    l1 = Lasso()
    l2 = Ridge()
    
    X_tr,X_te = X.iloc[tr] ,X.iloc[te]
    y_tr,y_te = y.iloc[tr] ,y.iloc[te]
    
    scale = StandardScaler()
    
    X_tr_sc = scale.fit_transform(X_tr)
    X_te_sc = scale.transform(X_te)
    
    lr.fit(X_tr,y_tr)
#     print("L1")
    l1.fit(X_tr_sc,y_tr)
#     print("L2")
    l2.fit(X_tr_sc,y_tr)
    
    lr_r2.append(r2_score(y_te,lr.predict(X_te)))
    l1_r2.append(r2_score(y_te,l1.predict(X_te_sc)))
    l2_r2.append(r2_score(y_te,l2.predict(X_te_sc)))
    
    lr_rmse.append(np.sqrt(mean_squared_error(y_te,lr.predict(X_te))))
    l1_rmse.append(np.sqrt(mean_squared_error(y_te,l1.predict(X_te_sc))))
    l2_rmse.append(np.sqrt(mean_squared_error(y_te,l2.predict(X_te_sc))))

In [None]:
print (lr_r2)
print (l1_r2)
print (l2_r2)

In [None]:
print (lr_rmse)
print (l1_rmse)
print (l2_rmse)