### Practice Exercise (Multiple Linear Regression)

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [4]:
#loadin the dataset
df = pd.read_csv('restaurant-sales.csv')
print(f'Shape of the Dataset: {df.shape}\n')
print(f'Missing Values:\n {df.isnull().sum()}\n')
print(f'First look at the dataset:\n {df.head()}')

Shape of the Dataset: (30, 8)

Missing Values:
 Day                0
Temperature        0
IsWeekend          0
NearbyEvents       0
StaffCount         0
PromotionActive    0
OnlineOrders       0
DailySales         0
dtype: int64

First look at the dataset:
    Day  Temperature  IsWeekend  ...  PromotionActive  OnlineOrders  DailySales
0    1           22          0  ...                0            45        4200
1    2           25          0  ...                0            52        4800
2    3           28          0  ...                1            68        5500
3    4           30          0  ...                0            48        4400
4    5           24          1  ...                1            95        7800

[5 rows x 8 columns]


In [5]:
#feature and target variable
X = df.iloc[:,:-1].values
y = df[['DailySales']].values

print(f'Shape of X Var:\n {X.shape}')
print(f'Shape of y Var:\n {y.shape}')

Shape of X Var:
 (30, 7)
Shape of y Var:
 (30, 1)


In [6]:
#spliting the train and test set: 
X_train, X_test , y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [7]:
#training the regression model 
ln = LinearRegression()
ln.fit(X_train,y_train)

In [20]:
#Intercept and coef of the dataset:
print(f'Intercept: {ln.intercept_}\n')

feature_name = ["Day", 'Temperature' , 'IsWeekend', 'NearbyEvents' ,'StaffCount' ,'PromotionActive','OnlineOrders']

for name , coef in zip(feature_name,(ln.coef_.ravel())):
    print(f'Coefficient for the feature {name} is {coef:.2f}\n')

Intercept: [1670.65539237]

Coefficient for the feature Day is -1.64

Coefficient for the feature Temperature is -4.17

Coefficient for the feature IsWeekend is 419.69

Coefficient for the feature NearbyEvents is 107.62

Coefficient for the feature StaffCount is -2.25

Coefficient for the feature PromotionActive is -71.39

Coefficient for the feature OnlineOrders is 59.71



In [22]:
# making the prediction 
pred_y = ln.predict(X_test)
print(f'shape of the predicted var: \n{pred_y.shape}\n')
print(f'Pred_y variable: \n{pred_y}')


shape of the predicted var: 
(6, 1)

Pred_y variable: 
[[4588.39961735]
 [4435.36168031]
 [6686.1632481 ]
 [7287.37443232]
 [4931.3082139 ]
 [4329.94879983]]


In [24]:
#comparing the results
np.set_printoptions(precision=2)
print(np.concatenate((y_test.reshape(len(y_test),1),pred_y.reshape(len(pred_y),1)),1))

[[4750.   4588.4 ]
 [4500.   4435.36]
 [6700.   6686.16]
 [7200.   7287.37]
 [5100.   4931.31]
 [4350.   4329.95]]


In [27]:
#visualization of the comparsion 

df_comparsion = pd.DataFrame({
    "Actual Value" : y_test.ravel(),
    "Predicted Value" : pred_y.ravel(),
    "Error in prediction" : y_test.ravel() - pred_y.ravel()
})

df_comparsion

Unnamed: 0,Actual Value,Predicted Value,Error in prediction
0,4750,4588.399617,161.600383
1,4500,4435.36168,64.63832
2,6700,6686.163248,13.836752
3,7200,7287.374432,-87.374432
4,5100,4931.308214,168.691786
5,4350,4329.9488,20.0512


In [31]:
#Evaluate Model:  
R2 = r2_score(y_test,pred_y)
mae = mean_absolute_error(y_test,pred_y)
mse = np.sqrt(mean_squared_error(y_test,pred_y))

print(f'Mean Absolute Error: {mae}\n')
print(f'Mean Squared Error: {mse}\n')

if R2 >= 0.9:
    print(f'Model is Working good as R2_score is {R2:.4f}')
elif R2 >= 0.7:
    print(f'Model is okish as R2_score is {R2:.4f}')
else:
    print(f'Needs improvement as R2 is {R2:.4f}')

Mean Absolute Error: 86.0321454707746

Mean Squared Error: 105.6547147528155

Model is Working good as R2_score is 0.9909
