### In this file we are going to study **Ridge, Lasso & ElasticNet** Regression

In [43]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import accuracy_score, r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
data = load_diabetes()

In [21]:
X = data.data
y = data.target

In [4]:
# sns.pairplot(pd.DataFrame(X))

#### Checking is there multicollinearity in our data or not

In [None]:
df = pd.DataFrame(X, columns=data.feature_names)
df.corr()

In [22]:

X = df
# Calculate VIF for each feature
vif_data = pd.DataFrame()
vif_data["Feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

print(vif_data)


  Feature        VIF
0     age   1.217307
1     sex   1.278071
2     bmi   1.509437
3      bp   1.459428
4      s1  59.202510
5      s2  39.193370
6      s3  15.402156
7      s4   8.890986
8      s5  10.075967
9      s6   1.484623


#### Data splitting for Training and Testing

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Linear Regression 

In [24]:
lr = LinearRegression()
lr.fit(X_train, y_train)
lr.score(X_test,y_test)

0.4526027629719196

#### Ridge Regression

In [61]:
ridge = Ridge(alpha=0.1)
ridge.fit(X_train, y_train)
# ridge.score(X_test, y_test)
param_grid = {
    "alpha":[0.001, 0.01, 0.1, 1, 10],
    "max_iter":[500, 1000, 2000],
    "random_state":[0,21,42,84]
}
ridge_gs = GridSearchCV(ridge, param_grid, cv=5)
ridge_gs.fit(X_train, y_train)
# ridge_gs.score(X_test, y_test)

#### Lasso Regresssion

In [63]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

lasso_gs = GridSearchCV(lasso, param_grid, cv=5, verbose=0)
lasso_gs.fit(X_train, y_train)
lasso_gs.score(X_test, y_test)

0.4718547867276227

#### ElasticNet Regression

In [53]:
elastic = ElasticNet(alpha=0.1, l1_ratio=1)
elastic.fit(X_train, y_train)
# param_grid = {
#     'alpha': [0.01, 0.1, 1, 10],
#     'l1_ratio': [0.1, 0.5, 0.7, 1]
# }
# grid_search = GridSearchCV(elastic, param_grid, cv=5)
# grid_search.fit(X_train, y_train)
elastic.score(X_test, y_test)

0.4718547867276227