# [Regularization](https://github.com/kokchun/Maskininlarning-AI21/blob/main/Lectures/L4-Regularization.ipynb)

## Data preparation

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

df = pd.read_csv('../Data/Advertising.csv', index_col=0)
X, y = df.drop('sales', axis=1), df['sales']

model_polynomial = PolynomialFeatures(3, include_bias=False)
poly_features = model_polynomial.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.33, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((134, 19), (66, 19), (134,), (66,))

## Feature standardization

In [15]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

scaled_X_train.mean(), scaled_X_train.std(), scaled_X_test.mean(), scaled_X_test.std()

(-3.34898382919136e-17, 1.0, -0.11982457640326809, 1.1245966534380971)

## Regularization techniques

### Ridge

In [25]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error

def ridge_regression(X_train, X_test, y, penalty=0):
    model_ridge = Ridge(alpha=penalty)
    model_ridge.fit(X_train, y)
    y_pred = model_ridge.predict(X_test)
    return y_pred

y_pred= ridge_regression(scaled_X_train, scaled_X_test, y_train)

MAE = mean_absolute_error(y_test, y_pred)
MSE = mean_squared_error(y_test,y_pred)
RMSE = np.sqrt(MSE)

RMSE, MAE

(0.5148267621786567, 0.3748516441217886)

In [21]:
from sklearn.linear_model import LinearRegression

model_linear = LinearRegression()
model_linear.fit(scaled_X_train, y_train)
y_pred_linear = model_linear.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, y_pred_linear)
MSE = mean_squared_error(y_test,y_pred_linear)
RMSE = np.sqrt(MSE)

RMSE, MAE

(0.5148267621786622, 0.37485164412178396)

### Lasso

In [27]:
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha = 0.1)
model_lasso.fit(scaled_X_train, y_train)
y_pred_lasso = model_lasso.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, y_pred_lasso)
MSE = mean_squared_error(y_test,y_pred_lasso)
RMSE = np.sqrt(MSE)

RMSE, MAE

(0.7853962108799017, 0.5735346450114956)

## K-folded cross-validation

In [34]:
from sklearn.linear_model import RidgeCV

model_ridgeCV = RidgeCV(alphas=(.00001, .0001, .001, .01, .09, .1, .11, .3, .5, .6, .7, .9, 1, 5, 10)) # alpha is same as lambda in theory - penalty term
model_ridgeCV.fit(scaled_X_train, y_train)
print(model_ridgeCV.alpha_)
print(model_ridgeCV.coef_)
y_pred_ridgeCV = model_ridgeCV.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, y_pred_ridgeCV)
MSE = mean_squared_error(y_test,y_pred_ridgeCV)
RMSE = np.sqrt(MSE)

RMSE, MAE

0.09
[ 5.96148356  0.50352189  0.71735081 -6.47536665  3.81038215 -1.38909347
 -0.07894157  0.0878956  -0.32824635  2.3457539  -0.49831663  0.73076724
  0.59562279 -0.59678576  0.59362944 -0.29955704  0.35328991  0.03140165
 -0.14314102]


(0.5572774552748411, 0.4277823528843387)

In [38]:
from sklearn.linear_model import LassoCV

model_lassoCV = LassoCV(eps=0.001, n_alphas=100, max_iter=1e4, cv=5)
model_lassoCV.fit(scaled_X_train, y_train)
print(model_lassoCV.alpha_)
print(model_lassoCV.coef_)
y_pred_lassoCV = model_lassoCV.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, y_pred_lassoCV)
MSE = mean_squared_error(y_test,y_pred_lassoCV)
RMSE = np.sqrt(MSE)

RMSE, MAE

0.004968802520343366
[ 5.19612354  0.43037087  0.29876351 -4.80417579  3.46665205 -0.40507212
  0.          0.          0.          1.35260206 -0.          0.
  0.14879719 -0.          0.          0.          0.09649665  0.
  0.04353956]


(0.5785146895301977, 0.46291883026932984)

In [40]:
from sklearn.linear_model import ElasticNetCV

model_elasticCV = ElasticNetCV(l1_ratio=[.05, .1, .2, .5, .7, .9, .95, 1], max_iter=10000)
model_elasticCV.fit(scaled_X_train, y_train)
print(model_elasticCV.l1_ratio_)
print(model_elasticCV.coef_)
print(model_elasticCV.alpha_)

1.0
[ 5.19612354  0.43037087  0.29876351 -4.80417579  3.46665205 -0.40507212
  0.          0.          0.          1.35260206 -0.          0.
  0.14879719 -0.          0.          0.          0.09649665  0.
  0.04353956]
0.004968802520343366


In [42]:
y_pred_elasticCV = model_elasticCV.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, y_pred_elasticCV)
MSE = mean_squared_error(y_test,y_pred_elasticCV)
RMSE = np.sqrt(MSE)

RMSE, MAE

(0.5785146895301977, 0.46291883026932984)