## Regulairzed Regression Models

In [1]:
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_absolute_error as mae


computers = pd.read_csv("/Users/AnirbanGuha/Library/CloudStorage/OneDrive-Personal/Maven Analytics Courses/Data Science in Python - Regression/Course Materials/Data/Computers.csv")

computers.tail()

Unnamed: 0,price,speed,hd,ram,screen,cd,multi,premium,ads,trend
6254,1690,100,528,8,15,no,no,yes,39,35
6255,2223,66,850,16,15,yes,yes,yes,39,35
6256,2654,100,1200,24,15,yes,no,yes,39,35
6257,2195,100,850,16,15,yes,no,yes,39,35
6258,2490,100,850,16,17,yes,no,yes,39,35


In [2]:
# Use the dataframe below for model fitting, afterwards, feel free to try more feature engineering!

computers_eng = (
    pd.get_dummies(
        computers.assign(
            hd2 = computers["hd"] ** 2,
            hd3 = computers["hd"] ** 3,
        ),
    drop_first=True, dtype="int"
    )
)

In [3]:
from sklearn.model_selection import train_test_split

X = sm.add_constant(computers_eng.drop("price", axis=1))
y = np.log(computers["price"])

# Test Split

X, X_test, y, y_test = train_test_split(X, y, test_size=.2, random_state=12345)

In [4]:
model = sm.OLS(y, X).fit()

#model.summary()

print(f"Training R2: {r2(y, model.predict(X))}")
print(f"Training MAE: {mae(y, model.predict(X))}")

Training R2: 0.8049349026307837
Training MAE: 0.08737502536630727


In [5]:
print(f"Test R2: {r2(y_test, model.predict(X_test))}")
print(f"Test MAE: {mae(y_test, model.predict(X_test))}")

Test R2: 0.8106348481658194
Test MAE: 0.08744222740403389


In [31]:
model.params[X.columns]

const          6.818326e+00
speed          3.989325e-03
hd             1.205916e-03
ram            2.016894e-02
screen         4.931753e-02
ads            1.627974e-04
trend         -2.441433e-02
hd2           -1.073990e-06
hd3            3.408844e-10
cd_yes         4.020758e-02
multi_yes      4.819221e-02
premium_yes   -2.437600e-01
dtype: float64

### 1. Ridge Regression Model

In [7]:
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler

In [11]:
std = StandardScaler()
X_m = std.fit_transform(X)
X_te = std.fit_transform(X_test)


In [15]:
# Test the model of n_alphas
n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, num=n_alphas)

#Fit the ridge model with 5 fold cross-validation and find the best alpha
ridge_model = RidgeCV(alphas=alphas,cv=5)

ridge_model.fit(X_m, y)
print(f'Ridge Model (Train) R2: {ridge_model.score(X_m, y)}')
print(f'Ridge Model (Train) MAE: {mae(y, ridge_model.predict(X_m))}')
print(f'Ridge Model Alpha: {ridge_model.alpha_}')


Ridge Model (Train) R2: 0.8048079003150022
Ridge Model (Train) MAE: 0.08735780764699269
Ridge Model Alpha: 2.2219468609395236


In [19]:
print("------Ridge Model Test Scores------------")
print(f'Ridge Model (Test) R2: {ridge_model.score(X_te, y_test)}')
print(f'Ridge Model (Test) MAE: {mae(y_test, ridge_model.predict(X_te))}')

------Ridge Model Test Scores------------
Ridge Model (Test) R2: 0.8116110236700145
Ridge Model (Test) MAE: 0.08726350617129108


In [36]:
pd.DataFrame({"features":X.columns,"coef_ridge":ridge_model.coef_,
             "coef_linear":model.params})

Unnamed: 0,features,coef_ridge,coef_linear
const,const,0.0,6.818326
speed,speed,0.084746,0.003989325
hd,hd,0.288951,0.001205916
ram,ram,0.113356,0.02016894
screen,screen,0.045078,0.04931753
ads,ads,0.012539,0.0001627974
trend,trend,-0.190195,-0.02441433
hd2,hd2,-0.302365,-1.07399e-06
hd3,hd3,0.113938,3.408844e-10
cd_yes,cd_yes,0.020061,0.04020758


### 2. Lasso Regression Model

In [37]:
from sklearn.linear_model import LassoCV

In [38]:
# Test the model of n_alphas
n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, num=n_alphas)

#Fit the ridge model with 5 fold cross-validation and find the best alpha
lasso_model = LassoCV(alphas=alphas,cv=5)

lasso_model.fit(X_m, y)
print("---------------Lasso Model Training Scores---------------")
print(f'Lasso Model Alpha: {lasso_model.alpha_}')
print(f'Lasso Model (Train) R2: {lasso_model.score(X_m, y)}')
print(f'Lasso Model (Train) MAE: {mae(y, lasso_model.predict(X_m))}')


print("------Lasso Model Test Scores------------")
print(f'Lasso Model (Test) R2: {lasso_model.score(X_te, y_test)}')
print(f'Lasso Model (Test) MAE: {mae(y_test, lasso_model.predict(X_te))}')


---------------Lasso Model Training Scores---------------
Lasso Model Alpha: 0.001
Lasso Model (Train) R2: 0.7990178822046732
Lasso Model (Train) MAE: 0.08856024399732959
------Lasso Model Test Scores------------
Lasso Model (Test) R2: 0.8026523296375802
Lasso Model (Test) MAE: 0.08905885254506005


In [40]:
pd.DataFrame({"features":X.columns,
              "coef_ridge":ridge_model.coef_,
              "coef_lasso":lasso_model.coef_,
              "coef_linear":model.params})

Unnamed: 0,features,coef_ridge,coef_lasso,coef_linear
const,const,0.0,0.0,6.818326
speed,speed,0.084746,0.085213,0.003989325
hd,hd,0.288951,0.180404,0.001205916
ram,ram,0.113356,0.112819,0.02016894
screen,screen,0.045078,0.045433,0.04931753
ads,ads,0.012539,0.014418,0.0001627974
trend,trend,-0.190195,-0.186978,-0.02441433
hd2,hd2,-0.302365,-0.090132,-1.07399e-06
hd3,hd3,0.113938,0.0,3.408844e-10
cd_yes,cd_yes,0.020061,0.019338,0.04020758


### 3. Elastic Net Regression

In [42]:
from sklearn.linear_model import ElasticNetCV

In [46]:
# Test the model of n_alphas
n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, num=n_alphas)
l1_ratio = np.linspace(0.01,1,100)

#Fit the ridge model with 5 fold cross-validation and find the best alpha
ElasticNet_model = ElasticNetCV(alphas=alphas,l1_ratio=l1_ratio,cv=5)

ElasticNet_model.fit(X_m, y)
print("---------------Elastic Net Model Training Scores---------------")
print(f'Elastic Model Alpha: {ElasticNet_model.alpha_}')
print(f'Elastic Net Lambda: {ElasticNet_model.l1_ratio_}')
print(f'Elastic Net (Train) R2: {ElasticNet_model.score(X_m, y)}')
print(f'Elastic Net Model (Train) MAE: {mae(y, ElasticNet_model.predict(X_m))}')


print("------Elastic Net Model Test Scores------------")
print(f'Elastic Net Model (Test) R2: {ElasticNet_model.score(X_te, y_test)}')
print(f'Elastic Net Model (Test) MAE: {mae(y_test, ElasticNet_model.predict(X_te))}')



---------------Elastic Net Model Training Scores---------------
Elastic Model Alpha: 0.001
Elastic Net Lambda: 0.01
Elastic Net (Train) R2: 0.8043966703829291
Elastic Net Model (Train) MAE: 0.08741998821560497
------Elastic Net Model Test Scores------------
Elastic Net Model (Test) R2: 0.8108572232845515
Elastic Net Model (Test) MAE: 0.0874119163471673


In [45]:
pd.DataFrame({"features":X.columns,
              "coef_ridge":ridge_model.coef_,
              "coef_lasso":lasso_model.coef_,
              "coeff_ElasticNet":ElasticNet_model.coef_,
              "coef_linear":model.params})

Unnamed: 0,features,coef_ridge,coef_lasso,coeff_ElasticNet,coef_linear
const,const,0.0,0.0,0.0,6.818326
speed,speed,0.084746,0.085213,0.085026,0.003989325
hd,hd,0.288951,0.180404,0.259425,0.001205916
ram,ram,0.113356,0.112819,0.113278,0.02016894
screen,screen,0.045078,0.045433,0.045304,0.04931753
ads,ads,0.012539,0.014418,0.013177,0.0001627974
trend,trend,-0.190195,-0.186978,-0.189573,-0.02441433
hd2,hd2,-0.302365,-0.090132,-0.243797,-1.07399e-06
hd3,hd3,0.113938,0.0,0.082062,3.408844e-10
cd_yes,cd_yes,0.020061,0.019338,0.019962,0.04020758
