In [140]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [141]:
df = pd.read_csv("datasets/supershops.csv")
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [142]:
df.dropna(inplace=True)

In [143]:
df.isnull().sum()

Marketing Spend    0
Administration     0
Transport          0
Area               0
Profit             0
dtype: int64

In [146]:
x = df.drop(['Profit', 'Area'], axis=1)

In [148]:
x

Unnamed: 0,Marketing Spend,Administration,Transport
0,114523.61,136897.8,471784.1
1,162597.7,151377.59,443898.53
2,153441.51,101145.55,407934.54
3,144372.41,118671.85,383199.62
4,142107.34,91391.77,366168.42
5,131876.9,99814.71,362861.36
6,134615.46,147198.87,127716.82
7,130298.13,145530.06,323876.68
8,120542.52,148718.95,311613.29
9,123334.88,108679.17,304981.62


In [150]:
y = df[['Profit']]
y

Unnamed: 0,Profit
0,192261.83
1,191792.06
2,191050.39
3,182901.99
4,166187.94
5,156991.12
6,156122.51
7,155752.6
8,152211.77
9,149759.96


In [None]:
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.25, random_state=2)

In [187]:
xTrain

Unnamed: 0,Marketing Spend,Administration,Transport
28,66051.52,182645.56,118148.2
49,0.0,116983.8,45173.06
2,153441.51,101145.55,407934.54
47,0.0,135426.92,0.0
18,91749.16,114175.79,294919.57
15,165349.2,122616.84,261776.23
29,65605.48,153032.06,107138.38
23,67532.53,105751.03,304768.73
16,78013.11,121597.55,264346.06
42,23640.93,96189.63,148001.11


In [154]:
yTrain

Unnamed: 0,Profit
31,97483.56
26,105733.54
47,42559.73
30,99937.59
28,103282.38
1,191792.06
25,107404.34
34,96712.8
39,81005.76
20,118474.03


In [166]:
print(f"xtrain shape {xTrain.shape}")
print(f"xtest shape {xTest.shape}")

xtrain shape (36, 3)
xtest shape (13, 3)


In [167]:
lasso_model = Lasso()
lasso_model

In [169]:
training = lasso_model.fit(xTrain, yTrain)
training

In [172]:
score_training = lasso_model.score(xTrain, yTrain)
score_training

0.9070358408360997

In [173]:
score_testing = lasso_model.score(xTest, yTest)
score_testing

0.8726434699700415

In [180]:
xTrain

Unnamed: 0,Marketing Spend,Administration,Transport
31,61136.38,152701.92,88218.23
26,75328.87,144135.98,134050.07
47,0.0,135426.92,0.0
30,61994.48,115641.28,91131.24
28,66051.52,182645.56,118148.2
1,162597.7,151377.59,443898.53
25,64664.71,139553.16,137962.62
34,46426.07,157693.92,210797.67
39,38558.51,82982.09,174999.3
20,76253.86,113867.3,298664.47


In [188]:
single_pred = lasso_model.predict([[162597.70, 151377.59, 443898.53]])
single_pred

array([184968.54357471])

In [189]:
yPredicted = lasso_model.predict(xTrain)
pd.DataFrame(yPredicted)

Unnamed: 0,0
0,98092.605391
1,47894.010609
2,177623.383022
3,45277.566667
4,126935.318124
5,178636.27008
6,97863.561292
7,109993.603151
8,115286.377299
9,70582.797905


In [190]:
ridge_model = Ridge()
ridge_model

In [192]:
training_for_ridge = ridge_model.fit(xTrain, yTrain)
training_for_ridge

In [194]:
testing_for_ridge = ridge_model.fit(xTest, yTest)
testing_for_ridge

In [195]:
score_train_for_ridge = ridge_model.score(xTrain, yTrain)
score_train_for_ridge

0.8663419782011735

In [196]:
score_testing_for_ridge = ridge_model.score(xTest, yTest)
score_testing_for_ridge

0.963690575832819

In [197]:
print(f"Train Score - Lasso Model : {score_training}")
print(f"Test Score - Lasso Model : {score_testing}")
print(f"Train Score - Ridge Model : {score_train_for_ridge}")
print(f"Test Score - Ridge Model : {score_testing_for_ridge}")

Train Score - Lasso Model : 0.9070358408360997
Test Score - Lasso Model : 0.8726434699700415
Train Score - Ridge Model : 0.8663419782011735
Test Score - Ridge Model : 0.963690575832819
