In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings 
warnings.filterwarnings('ignore')

In [17]:
df=sns.load_dataset('mpg')
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger


In [18]:
# we are dropping the name column cause it has no significane for our model ( no patterns that our ml model can learn
df.drop(columns='name',inplace=True)

In [19]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130.0,3504,12.0,70,usa
1,15.0,8,350.0,165.0,3693,11.5,70,usa
2,18.0,8,318.0,150.0,3436,11.0,70,usa
3,16.0,8,304.0,150.0,3433,12.0,70,usa
4,17.0,8,302.0,140.0,3449,10.5,70,usa
...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa
394,44.0,4,97.0,52.0,2130,24.6,82,europe
395,32.0,4,135.0,84.0,2295,11.6,82,usa
396,28.0,4,120.0,79.0,2625,18.6,82,usa


In [20]:
#lets perform some EDA 
df.isna().sum() # we are calculating the sum of null values from each col

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model_year      0
origin          0
dtype: int64

In [21]:
df.shape

(398, 8)

In [22]:
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower      float64
weight            int64
acceleration    float64
model_year        int64
origin           object
dtype: object

In [23]:
# since we haven't done the outlier treatment its better to fill up with median
df['horsepower'].median()

np.float64(93.5)

In [24]:
df['horsepower']=df['horsepower'].fillna(df['horsepower'].median())

In [25]:
df.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      0
weight          0
acceleration    0
model_year      0
origin          0
dtype: int64

In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 25.0+ KB


In [27]:
#origin has object datatype that means string so lets see how many unique values are there
df['origin'].value_counts()

origin
usa       249
japan      79
europe     70
Name: count, dtype: int64

In [28]:
#since its a categorical columns its compulsory to do data encoding that means we assign usa to 1 japan to 2 and europe to 3
df['origin']=df['origin'].map({'usa':1,'japan':2,'europe':3})


In [30]:
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower      float64
weight            int64
acceleration    float64
model_year        int64
origin            int64
dtype: object

In [31]:
#now separate into X and y
X=df.drop('mpg',axis=1) # we are just dropping mpg col and adding all remaining columns to x
y=df['mpg']

In [32]:
X

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,8,307.0,130.0,3504,12.0,70,1
1,8,350.0,165.0,3693,11.5,70,1
2,8,318.0,150.0,3436,11.0,70,1
3,8,304.0,150.0,3433,12.0,70,1
4,8,302.0,140.0,3449,10.5,70,1
...,...,...,...,...,...,...,...
393,4,140.0,86.0,2790,15.6,82,1
394,4,97.0,52.0,2130,24.6,82,3
395,4,135.0,84.0,2295,11.6,82,1
396,4,120.0,79.0,2625,18.6,82,1


In [33]:
#train test split
from sklearn.model_selection import train_test_split

In [34]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30, random_state=1)

In [35]:
#simple Linear Regression Model
from sklearn.linear_model import LinearRegression
model=LinearRegression()

In [36]:
model

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [37]:
model.fit(X_train,y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [38]:
model.coef_

array([-0.31761423,  0.02623748, -0.01827076, -0.00748775,  0.05040673,
        0.84709514,  1.51909584])

In [None]:
# here the coef are relatively smaller so even if one independent var changes there wont be much difference in prediction
# This is sometimes called a smoother model

#these features might not be contributing much to the prediciton

In [39]:
from sklearn.metrics import r2_score
ypred=model.predict(X_test)
r2linear=r2_score(y_test,ypred)
print(f'the rsquare is {r2linear}')

the rsquare is 0.8348001123742286


In [40]:
# regularized model
#ridge regression

In [41]:
from sklearn.linear_model import Ridge
ridge_regression_model=Ridge(alpha=0.1)# random val for alpha
ridge_regression_model
#in practical implementation lambda is alpha


0,1,2
,alpha,0.1
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [None]:
#alpha is the constant that multiplies the penalty term

In [42]:
ridge_regression_model.fit(X_train,y_train)

0,1,2
,alpha,0.1
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [43]:
for i, col_name in enumerate(X_train.columns):
    print(f'the coefficient for {col_name} is {ridge_regression_model.coef_[i]}')

the coefficient for cylinders is -0.3170032101006609
the coefficient for displacement is 0.026213249757982955
the coefficient for horsepower is -0.01826325248144886
the coefficient for weight is -0.0074873260502131105
the coefficient for acceleration is 0.05036896947442607
the coefficient for model_year is 0.8470062938903142
the coefficient for origin is 1.517452828565376


In [44]:
#for ridge regression evaluation
ypred_ridge = ridge_regression_model.predict(X_test)
r2_score(y_test,ypred_ridge)

0.8348084889168357

In [45]:
# since the model is somewhat smooth there is not much difference in r2_score

In [46]:
#Lasso Regression
from sklearn.linear_model import Lasso

In [47]:
lasso_regression_model = Lasso(alpha=0.5)
lasso_regression_model

0,1,2
,alpha,0.5
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [48]:
lasso_regression_model.fit(X_train,y_train)

0,1,2
,alpha,0.5
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [49]:
for i, col_name in enumerate(X_train.columns):
    print(f'the coefficient for {col_name} is {lasso_regression_model.coef_[i]}')

the coefficient for cylinders is -0.0
the coefficient for displacement is 0.006208198888300381
the coefficient for horsepower is -0.011058382987169605
the coefficient for weight is -0.00698267316802309
the coefficient for acceleration is 0.0
the coefficient for model_year is 0.7446549520038191
the coefficient for origin is 0.0


In [None]:
#observation >>> 3 feature coefficients become 0 ... Lasso helps in feature selection

In [50]:
#evaluation for lasso
ypred_lasso = lasso_regression_model.predict(X_test)
r2_lasso=r2_score(y_test,ypred_lasso)
print(f'the r sq of lasso regression is {r2_lasso}')

the r sq of lasso regression is 0.8277934716635554


In [51]:
#elastic net 
from sklearn.linear_model import ElasticNet

In [None]:
#l1_ration parameter in elastic net determines how much percentage is lasso nad rigge
#for example if l1_ratio is 1 then its entirely lasso
#if l1_ratio is 0.8 that means 80 percent is lasso and 20 % is ridge

In [52]:
elastic_net_model = ElasticNet(alpha=1 , l1_ratio=0.5) # we can have l1 ratio of our choice
elastic_net_model.fit(X_train,y_train)

0,1,2
,alpha,1
,l1_ratio,0.5
,fit_intercept,True
,precompute,False
,max_iter,1000
,copy_X,True
,tol,0.0001
,warm_start,False
,positive,False
,random_state,


In [53]:
for i, col_name in enumerate(X_train.columns):
    print(f'the coefficient for {col_name} is {elastic_net_model.coef_[i]}')

the coefficient for cylinders is -0.0
the coefficient for displacement is 0.005888869953667564
the coefficient for horsepower is -0.012403874933570128
the coefficient for weight is -0.006934550516257631
the coefficient for acceleration is 0.0
the coefficient for model_year is 0.7133150744603873
the coefficient for origin is 0.0


In [54]:
#evaluation for elastic net
ypred_elastic = elastic_net_model.predict(X_test)
r2_en=r2_score(y_test,ypred_elastic)
print(f'the r sq of elastic net is {r2_en}')

the r sq of elastic net is 0.8284840073256803


In [57]:
#regularization with crossvalidation
from sklearn.linear_model import LassoCV # internally it takes care of cross validation and gives us the avg value for lasso regression
lasso_cv = LassoCV(cv=5 , verbose=2)
lasso_cv

0,1,2
,eps,0.001
,n_alphas,'deprecated'
,alphas,'warn'
,fit_intercept,True
,precompute,'auto'
,max_iter,1000
,tol,0.0001
,copy_X,True
,cv,5
,verbose,2


In [58]:
lasso_cv.fit(X_train,y_train)

Path: 000 out of 100
Path: 001 out of 100
Path: 002 out of 100
Path: 003 out of 100
Path: 004 out of 100
Path: 005 out of 100
Path: 006 out of 100
Path: 007 out of 100
Path: 008 out of 100
Path: 009 out of 100
Path: 010 out of 100
Path: 011 out of 100
Path: 012 out of 100
Path: 013 out of 100
Path: 014 out of 100
Path: 015 out of 100
Path: 016 out of 100
Path: 017 out of 100
Path: 018 out of 100
Path: 019 out of 100
Path: 020 out of 100
Path: 021 out of 100
Path: 022 out of 100
Path: 023 out of 100
Path: 024 out of 100
Path: 025 out of 100
Path: 026 out of 100
Path: 027 out of 100
Path: 028 out of 100
Path: 029 out of 100
Path: 030 out of 100
Path: 031 out of 100
Path: 032 out of 100
Path: 033 out of 100
Path: 034 out of 100
Path: 035 out of 100
Path: 036 out of 100
Path: 037 out of 100
Path: 038 out of 100
Path: 039 out of 100
Path: 040 out of 100
Path: 041 out of 100
Path: 042 out of 100
Path: 043 out of 100
Path: 044 out of 100
Path: 045 out of 100
Path: 046 out of 100
Path: 047 out

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s finished


0,1,2
,eps,0.001
,n_alphas,'deprecated'
,alphas,'warn'
,fit_intercept,True
,precompute,'auto'
,max_iter,1000
,tol,0.0001
,copy_X,True
,cv,5
,verbose,2


In [59]:
y_pred=lasso_cv.predict(X_test)
r2_score(y_test,y_pred)

0.8082805983844751

In [60]:
#cross validation with hyper parameter tuning
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
lasso=Lasso()
lasso

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [61]:
#Grid search cv>>>> Dictionary with parameters names (str) as keys and lists of parametr settings to try as values
param_grid={'alpha':[0.001,0.01,0.1,1,100,100]}

In [62]:
grid_search=GridSearchCV(estimator=lasso, param_grid=param_grid, cv=5, scoring='r2', verbose=2)
grid_search

0,1,2
,estimator,Lasso()
,param_grid,"{'alpha': [0.001, 0.01, ...]}"
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [63]:
grid_search.fit(X_train,y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ........................................alpha=0.001; total time=   0.1s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ........................................

0,1,2
,estimator,Lasso()
,param_grid,"{'alpha': [0.001, 0.01, ...]}"
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,0.1
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [65]:
grid_search.best_params_ # that means model performed very well at this particular alpha

{'alpha': 0.1}

In [67]:
grid_search.best_params_['alpha']

0.1

In [72]:
grid_search.best_score_#Mean cross-validated score of the best_estimator

np.float64(0.7964209726696482)

In [69]:
grid_search.best_estimator_

0,1,2
,alpha,0.1
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [70]:
y_pred = grid_search.best_estimator_.predict(X_test)

In [71]:
r2_score(y_test,y_pred)

0.8345318641232304

In [82]:
#Randomized search cv
param_distributions={'alpha':[0.001,0.01,0.1,1,100,100]}
random_search=RandomizedSearchCV(estimator=lasso, param_distributions=param_distributions,n_iter=3, cv=5, scoring='r2', verbose=2)
random_search

0,1,2
,estimator,Lasso()
,param_distributions,"{'alpha': [0.001, 0.01, ...]}"
,n_iter,3
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [83]:
random_search.fit(X_train,y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ........................................

0,1,2
,estimator,Lasso()
,param_distributions,"{'alpha': [0.001, 0.01, ...]}"
,n_iter,3
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,alpha,0.1
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [86]:
random_search.best_estimator_

0,1,2
,alpha,0.1
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [87]:
random_search.best_score_

np.float64(0.7964209726696482)

In [88]:
y_pred=random_search.best_estimator_.predict(X_test)
r2_score(y_test,y_pred)

0.8345318641232304

In [89]:
#Ridge Grid Search CV
ridge=Ridge()
param_grid={'alpha':[0.001,0.01,0.1,1,100,100]}
ridge_grid_search=GridSearchCV(estimator=ridge, param_grid=param_grid, cv=5, scoring='r2', verbose=2)
ridge_grid_search.fit(X_train,y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ........................................

0,1,2
,estimator,Ridge()
,param_grid,"{'alpha': [0.001, 0.01, ...]}"
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,1
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [90]:
ypred=ridge_grid_search.best_estimator_.predict(X_test)
r2_score(y_test,y_pred)

0.8345318641232304

In [94]:
#Ridge randomized search cv
param_distributions={'alpha':[0.001,0.01,0.1,1,100,100]}
ridge_random_search=RandomizedSearchCV(estimator=ridge, param_distributions=param_distributions,n_iter=3, cv=5, scoring='r2', verbose=2)
ridge_random_search

0,1,2
,estimator,Ridge()
,param_distributions,"{'alpha': [0.001, 0.01, ...]}"
,n_iter,3
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [95]:
ridge_random_search.fit(X_train,y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................

0,1,2
,estimator,Ridge()
,param_distributions,"{'alpha': [0.001, 0.01, ...]}"
,n_iter,3
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,alpha,1
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [96]:
y_pred=ridge_random_search.best_estimator_.predict(X_test)
r2_score(y_test,y_pred)

0.8348814925912456

In [100]:
#Elastic net grid CV
model=ElasticNet()
param_grid={'alpha':[0.001,0.01,0.1,1,100,100],'l1_ratio':[0.1,0.4,0.9]}
model=GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='r2', verbose=2)
model.fit(X_train,y_train)
print(model.best_params_)
print(model.best_estimator_)
y_pred=model.best_estimator_.predict(X_test)
print(r2_score(y_test,y_pred))

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.9; total time=   0.0s
[CV] END ..........................alpha=0.001, 