In [1]:
#Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.linear_model import  Lasso, Ridge, ElasticNet
from sklearn.preprocessing import OrdinalEncoder
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/aishwaryamate/Datasets/refs/heads/main/Regularization.csv', index_col=0)
df.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,body-style,drive-wheels,engine-location,width,height,engine-type,engine-size,horsepower,city-mpg,highway-mpg,price
0,3,115,alfa-romero,gas,convertible,rwd,front,64.1,48.8,dohc,130,111,21,27,13495
1,3,115,alfa-romero,gas,convertible,rwd,front,64.1,48.8,dohc,130,111,21,27,16500
2,1,115,alfa-romero,gas,hatchback,rwd,front,65.5,52.4,ohcv,152,154,19,26,16500
3,2,164,audi,gas,sedan,fwd,front,66.2,54.3,ohc,109,102,24,30,13950
4,2,164,audi,gas,sedan,4wd,front,66.4,54.3,ohc,136,115,18,22,17450


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 205 entries, 0 to 204
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          205 non-null    int64  
 1   normalized-losses  205 non-null    int64  
 2   make               205 non-null    object 
 3   fuel-type          205 non-null    object 
 4   body-style         205 non-null    object 
 5   drive-wheels       205 non-null    object 
 6   engine-location    205 non-null    object 
 7   width              205 non-null    float64
 8   height             205 non-null    float64
 9   engine-type        205 non-null    object 
 10  engine-size        205 non-null    int64  
 11  horsepower         205 non-null    int64  
 12  city-mpg           205 non-null    int64  
 13  highway-mpg        205 non-null    int64  
 14  price              205 non-null    int64  
dtypes: float64(2), int64(7), object(6)
memory usage: 25.6+ KB


In [7]:
df.isna().sum()

symboling            0
normalized-losses    0
make                 0
fuel-type            0
body-style           0
drive-wheels         0
engine-location      0
width                0
height               0
engine-type          0
engine-size          0
horsepower           0
city-mpg             0
highway-mpg          0
price                0
dtype: int64

In [17]:
df.select_dtypes(object).columns

Index(['make', 'fuel-type', 'body-style', 'drive-wheels', 'engine-location',
       'engine-type'],
      dtype='object')

In [19]:
cat_cols = df.select_dtypes(object).columns
cat_cols

Index(['make', 'fuel-type', 'body-style', 'drive-wheels', 'engine-location',
       'engine-type'],
      dtype='object')

In [21]:
df[cat_cols]

Unnamed: 0,make,fuel-type,body-style,drive-wheels,engine-location,engine-type
0,alfa-romero,gas,convertible,rwd,front,dohc
1,alfa-romero,gas,convertible,rwd,front,dohc
2,alfa-romero,gas,hatchback,rwd,front,ohcv
3,audi,gas,sedan,fwd,front,ohc
4,audi,gas,sedan,4wd,front,ohc
...,...,...,...,...,...,...
200,volvo,gas,sedan,rwd,front,ohc
201,volvo,gas,sedan,rwd,front,ohc
202,volvo,gas,sedan,rwd,front,ohcv
203,volvo,diesel,sedan,rwd,front,ohc


In [23]:
oe = OrdinalEncoder()

df[cat_cols] = oe.fit_transform(df[cat_cols])

In [25]:
df

Unnamed: 0,symboling,normalized-losses,make,fuel-type,body-style,drive-wheels,engine-location,width,height,engine-type,engine-size,horsepower,city-mpg,highway-mpg,price
0,3,115,0.0,1.0,0.0,2.0,0.0,64.1,48.8,0.0,130,111,21,27,13495
1,3,115,0.0,1.0,0.0,2.0,0.0,64.1,48.8,0.0,130,111,21,27,16500
2,1,115,0.0,1.0,2.0,2.0,0.0,65.5,52.4,5.0,152,154,19,26,16500
3,2,164,1.0,1.0,3.0,1.0,0.0,66.2,54.3,3.0,109,102,24,30,13950
4,2,164,1.0,1.0,3.0,0.0,0.0,66.4,54.3,3.0,136,115,18,22,17450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,95,21.0,1.0,3.0,2.0,0.0,68.9,55.5,3.0,141,114,23,28,16845
201,-1,95,21.0,1.0,3.0,2.0,0.0,68.8,55.5,3.0,141,160,19,25,19045
202,-1,95,21.0,1.0,3.0,2.0,0.0,68.9,55.5,5.0,173,134,18,23,21485
203,-1,95,21.0,0.0,3.0,2.0,0.0,68.9,55.5,3.0,145,106,26,27,22470


In [27]:
df.dtypes

symboling              int64
normalized-losses      int64
make                 float64
fuel-type            float64
body-style           float64
drive-wheels         float64
engine-location      float64
width                float64
height               float64
engine-type          float64
engine-size            int64
horsepower             int64
city-mpg               int64
highway-mpg            int64
price                  int64
dtype: object

# Model building

In [29]:
x = df.iloc[:,:-1]
y = df['price']

In [31]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.3, random_state=1)

# Lasso

In [33]:
l1 = Lasso()
l1.fit(xtrain,ytrain)
l1.score(xtrain,ytrain), l1.score(xtest,ytest)

(0.8504215478243032, 0.7966615211575688)

# Hyperparameter Tuning

In [37]:
for i in range(100,200):
    l1 = Lasso(alpha=i)
    l1.fit(xtrain,ytrain)
    print(f"Alpha: {i} Train : {l1.score(xtrain,ytrain)} Test: {l1.score(xtest,ytest)}")

Alpha: 100 Train : 0.8372483974026499 Test: 0.8092040910955614
Alpha: 101 Train : 0.836989922954141 Test: 0.809297961073826
Alpha: 102 Train : 0.8367288785613756 Test: 0.8093910017461713
Alpha: 103 Train : 0.8364651676681872 Test: 0.8094832684251524
Alpha: 104 Train : 0.8361989818307349 Test: 0.8095746503624813
Alpha: 105 Train : 0.8359302253870347 Test: 0.8096652035299015
Alpha: 106 Train : 0.8356588978359674 Test: 0.80975492823091
Alpha: 107 Train : 0.8353849991129143 Test: 0.8098438245155561
Alpha: 108 Train : 0.8351084257059384 Test: 0.809931945484127
Alpha: 109 Train : 0.8348293842179739 Test: 0.8100191840285438
Alpha: 110 Train : 0.8345477714936768 Test: 0.8101055942385993
Alpha: 111 Train : 0.8342635877040371 Test: 0.8101911760417995
Alpha: 112 Train : 0.8339767222137543 Test: 0.8102759813628972
Alpha: 113 Train : 0.833687395461321 Test: 0.810359905626256
Alpha: 114 Train : 0.8333954973355284 Test: 0.8104430016665038
Alpha: 115 Train : 0.8331010276993129 Test: 0.8105252695475365

In [39]:
l1 = Lasso(alpha=175)
l1.fit(xtrain,ytrain)
l1.score(xtrain,ytrain), l1.score(xtest,ytest)

(0.8107234361496891, 0.8139459609474125)

# Ridge 

In [41]:
l2 = Ridge()

l2.fit(xtrain,ytrain)
l2.score(xtrain,ytrain), l2.score(xtest,ytest)

(0.8435840853399227, 0.8075632224690538)

In [43]:
for i in range(1,100):
    l2 = Ridge(alpha=i)
    l2.fit(xtrain,ytrain)
    print(f"Alpha: {i} Train: {l2.score(xtrain,ytrain)} Test: {l2.score(xtest,ytest)}")

Alpha: 1 Train: 0.8435840853399227 Test: 0.8075632224690538
Alpha: 2 Train: 0.8356695734845092 Test: 0.8112192014374254
Alpha: 3 Train: 0.8296379623431075 Test: 0.8129299663310143
Alpha: 4 Train: 0.8250699092246865 Test: 0.8138839096972439
Alpha: 5 Train: 0.8215093087765016 Test: 0.8144682684596017
Alpha: 6 Train: 0.8186486103834849 Test: 0.8148435627265231
Alpha: 7 Train: 0.8162882573020809 Test: 0.8150880725612082
Alpha: 8 Train: 0.8142964263180523 Test: 0.8152447305071581
Alpha: 9 Train: 0.812583522729097 Test: 0.8153392574436444
Alpha: 10 Train: 0.8110868722186453 Test: 0.8153881483263246
Alpha: 11 Train: 0.8097614513602519 Test: 0.8154025610279507
Alpha: 12 Train: 0.8085741366835051 Test: 0.8153903693701571
Alpha: 13 Train: 0.8075000372738095 Test: 0.8153573212892012
Alpha: 14 Train: 0.8065200924084595 Test: 0.8153077294072232
Alpha: 15 Train: 0.8056194580416448 Test: 0.8152449025615436
Alpha: 16 Train: 0.8047863980733029 Test: 0.815171426484327
Alpha: 17 Train: 0.8040115065187471

In [45]:
l2 = Ridge(alpha=7)
l2.fit(xtrain,ytrain)
l2.score(xtrain,ytrain),l2.score(xtest,ytest)

(0.8162882573020809, 0.8150880725612082)

# Elastic Net

In [47]:
en = ElasticNet()
en.fit(xtrain,ytrain)
en.score(xtrain,ytrain), en.score(xtest,ytest)

(0.7866253599240464, 0.8098264817256129)