# Regularized linear models

- Ridge - l2 regularization
- LASSO - l1 regularization
- ElasticNet - combination of l1 and l2

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("../Data/Advertising.csv", index_col=0)
df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


## Data preparation

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

X, y = df.drop("sales", axis= "columns"), df["sales"]

# feel free to experiment with different degrees
model_polynomial = PolynomialFeatures(degree=3, include_bias=False)
poly_features = model_polynomial.fit_transform(X) # works on the whole data here but not for scaling, then split.

print(poly_features.shape)

# help(train_test_split) to get the code below, the order is important!
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.33, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(200, 19)


((134, 19), (66, 19), (134,), (66,))

## Feature standardization

In [18]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

print(f"scaled_X_train mean: {scaled_X_train.mean()} std: {scaled_X_train.std()}")
print(f"scaled_X_test mean: {scaled_X_test.mean()} std: {scaled_X_test.std()}")

scaled_X_train mean: -3.34898382919136e-17 std: 1.0
scaled_X_test mean: -0.11982457640326809 std: 1.1245966534380971


## Regularization - Ridge l2

In [34]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error

def ridge_regression(X_train, X_test, y, penalty = 0):
    model_ridge = Ridge(alpha = penalty)
    model_ridge.fit(X_train, y)
    y_pred = model_ridge.predict(X_test)
    return y_pred

y_pred = ridge_regression(scaled_X_train, scaled_X_test, y_train, 0)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

(0.5148267621786567, 0.3748516441217886)

In [35]:
from sklearn.linear_model import LinearRegression

model_linear = LinearRegression()
model_linear.fit(scaled_X_train, y_train)
y_pred = model_linear.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

(0.5148267621786622, 0.37485164412178396)

## Regularization - LASSO l1

In [43]:
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha = 0.1)
model_lasso.fit(scaled_X_train, y_train)

y_pred = model_lasso.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

(0.7853962108799017, 0.5735346450114956)

## k-folded cross-validation

### RidgeCV l2

In [53]:
from sklearn.linear_model import RidgeCV # CV = cross validation, inget valt blir leave one out validation som är bra men dyrt

# alpha is same as lambda in theory - penalty term
model_ridgeCV = RidgeCV(alphas=(.00001, .0001, .001, .01, .1, .3, .5, .7, .9, 1, 5, 10))
model_ridgeCV.fit(scaled_X_train, y_train)
print(model_ridgeCV.alpha_) # model choice

print(model_ridgeCV.coef_)

y_pred = model_ridgeCV.predict(scaled_X_test)
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

0.1
[ 5.84681185  0.52142086  0.71689997 -6.17948738  3.75034058 -1.36283352
 -0.08571128  0.08322815 -0.34893776  2.16952446 -0.47840838  0.68527348
  0.63080799 -0.5950065   0.61661989 -0.31335495  0.36499629  0.03328145
 -0.13652471]


(0.5635899169609213, 0.434307576654291)

### LassoCV l1

In [60]:
from sklearn.linear_model import LassoCV

model_lassoCV = LassoCV(eps = 0.001, n_alphas = 100, max_iter = 1e4, cv = 5)
model_lassoCV.fit(scaled_X_train, y_train)

print(f"Chosen alpha (penalty term): {model_lassoCV.alpha_}")
print(model_lassoCV.coef_)

y_pred = model_lassoCV.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE

Chosen alpha (penalty term): 0.004968802520343366
[ 5.19612354  0.43037087  0.29876351 -4.80417579  3.46665205 -0.40507212
  0.          0.          0.          1.35260206 -0.          0.
  0.14879719 -0.          0.          0.          0.09649665  0.
  0.04353956]


(0.5785146895301977, 0.46291883026932984)

### ElasticNetCV

In [68]:
from sklearn.linear_model import ElasticNetCV

model_elastic = ElasticNetCV(l1_ratio = (.05, .1, .2, .5, .7, .9, .99, 1), max_iter=10000)
model_elastic.fit(scaled_X_train, y_train)

print(f"L1 ratio: {model_elastic.l1_ratio_}")
print(f"alpha (penalty): {model_elastic.alpha_}")

L1 ratio: 1.0
alpha (penalty): 0.004968802520343366


In [70]:
y_pred = model_elastic.predict(scaled_X_test)

MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
MAE = mean_absolute_error(y_test, y_pred)

RMSE, MAE


(0.5785146895301977, 0.46291883026932984)