# Ridge - Lasso Regularization

In [2]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### EDA

In [4]:
#load data
data=pd.read_csv(r"data\Advertising.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [5]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)

In [6]:
data.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [7]:
data.isna().sum()

TV           0
radio        0
newspaper    0
sales        0
dtype: int64

In [8]:
data.duplicated().sum()

0

### Modeling

### Multiple linear regression - least squares fitting 

In [11]:
X = data.drop(['sales'], axis=1)
y = data['sales']

In [12]:
#splitting data for training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,random_state=42) #0.67 data will be for training.

In [13]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train,y_train)
y_predlr=lin_reg.predict(X_test)

In [14]:
?lin_reg

[1;31mType:[0m        LinearRegression
[1;31mString form:[0m LinearRegression()
[1;31mFile:[0m        c:\users\drnim\anaconda3\lib\site-packages\sklearn\linear_model\_base.py
[1;31mDocstring:[0m  
Ordinary least squares Linear Regression.

LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
to minimize the residual sum of squares between the observed targets in
the dataset, and the targets predicted by the linear approximation.

Parameters
----------
fit_intercept : bool, default=True
    Whether to calculate the intercept for this model. If set
    to False, no intercept will be used in calculations
    (i.e. data is expected to be centered).

copy_X : bool, default=True
    If True, X will be copied; else, it may be overwritten.

n_jobs : int, default=None
    The number of jobs to use for the computation. This will only provide
    speedup in case of sufficiently large problems, that is if firstly
    `n_targets > 1` and secondly `X` is sparse or if `pos

In [15]:
#calculating MSE - Testing error
from sklearn.metrics import mean_squared_error 
LR_Error=mean_squared_error(y_predlr,y_test)
LR_Error

3.7279283306815114

In [16]:
#Training Error
y_pred_train=lin_reg.predict(X_train)
LR_Training_Error=mean_squared_error(y_pred_train,y_train)
LR_Training_Error

2.4108586545578548

### Ridge Regression

In [18]:
?Ridge

Object `Ridge` not found.


In [19]:
from sklearn.model_selection import GridSearchCV #this is used for hyper parameter tuning
from sklearn.linear_model import Ridge

# hyper parameter - alpha,solver 
# - alpha : controlling regularization strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.
# - solver :  {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg','sag', 'saga', 'lbfgs'}, default='auto'
#           Solver to use in the computational routines
# Parameters which define the model architecture are referred to as hyperparameters and 
# thus this process of searching for the ideal model architecture is referred as hyperparamter tuning

# alphavalues: The provided list of values will be used as the parameter grid for tuning the Ridge Regression model.

# These values control the strength of regularization. Smaller values indicate weaker regularization,
# while larger values apply stronger regularization.
# GridSearchCV: Tests each value of alpha in the list using 5-fold cross-validation and 
# evaluates the model using negative mean squared error (MSE

alphavalues = [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20,30,100]

solvervalues =['auto','svd']

ridge = Ridge()

parameters = {'alpha': alphavalues, 'solver':solvervalues} #regularization parameter

ridge_regressor = GridSearchCV(ridge, parameters)
# GridSearchCV is a method in scikit-learn that automates the process of finding the best hyperparameters for a model. 
# It performs an exhaustive search over a specified parameter grid using cross-validation to evaluate performance.

ridge_regressor.fit(X_train, y_train)

In [20]:
ridge_regressor.best_estimator_

In [21]:
#to get the best parameter for the model
ridge_regressor.best_params_

{'alpha': 100, 'solver': 'auto'}

In [22]:
ridge_regressor.score(X_train, y_train)

0.909517572518106

In [23]:
ridge=Ridge(alpha=100,solver='auto')
ridge.fit(X_train,y_train)
y_pred_ridge=ridge.predict(X_test)

In [24]:
Ridge_Testing_Error=mean_squared_error(y_pred_ridge,y_test)
Ridge_Testing_Error

3.720601493106786

### Lasso

In [26]:
from sklearn.linear_model import Lasso

lasso = Lasso()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]}

lasso_regressor = GridSearchCV(lasso, parameters)

lasso_regressor.fit(X_train, y_train)

In [27]:
lasso_regressor.best_params_

{'alpha': 1}

In [28]:
BP=lasso_regressor.best_params_
lasso=Lasso(alpha=BP['alpha'])
lasso.fit(X_train,y_train)
y_pred_lasso=lasso.predict(X_test)

In [29]:
Lasso_Testing_Error=mean_squared_error(y_pred_lasso,y_test)
Lasso_Testing_Error

3.641439660278575

In [30]:
lasso.score(X_train, y_train)

0.9093183106685329

### Elastic Net

In [32]:
from sklearn.linear_model import ElasticNet
elastic = ElasticNet()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]}

elastic_regressor = GridSearchCV(elastic, parameters)

elastic_regressor.fit(X_train, y_train)

In [33]:
elastic_regressor.best_params_

{'alpha': 1}

In [34]:
elastic=ElasticNet(alpha=1)
elastic.fit(X_train,y_train)
y_pred_elastic=elastic.predict(X_test)

In [35]:
elastic_Testing_Error=mean_squared_error(y_pred_elastic,y_test)
elastic_Testing_Error

3.678636493022797

In [36]:
elastic.score(X_train, y_train)

0.9094544540835117

In [37]:
# so lasso have least testing error and it is the best,final model

### How to save a model?

In [73]:
import joblib
joblib.dump(elastic,'Advertising.pkl')

['Advertising.pkl']

### Load the saved model

In [80]:
model_from_dir=joblib.load('Advertising.pkl')
ypred=model_from_dir.predict(X_test)

In [38]:
# END