# Regularization with Lasso: Demonstration on Noisy Features
This notebook shows how Lasso can outperform OLS when many noisy predictors are added.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score

In [2]:
np.random.seed(0)

diabetes = datasets.load_diabetes()
X_original = diabetes.data
y = diabetes.target
feature_names_original = diabetes.feature_names

n, p = X_original.shape
n_noise_features = 100
X_noise = np.random.normal(size=(n, n_noise_features))
X_aug = np.hstack([X_original, X_noise])

feature_names_aug = feature_names_original + [f"noise_{k}" for k in range(n_noise_features)]

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X_aug, y, test_size=0.2, random_state=1111
)

X_train.shape, X_test.shape

((353, 110), (89, 110))

In [4]:
ols_full = linear_model.LinearRegression()
ols_full.fit(X_train, y_train)

r2_train_ols_full = r2_score(y_train, ols_full.predict(X_train))
r2_test_ols_full = r2_score(y_test, ols_full.predict(X_test))

r2_train_ols_full, r2_test_ols_full

(0.6500238674793164, 0.22763977192137097)

In [5]:
alphas = np.logspace(-3, 1, 20)
lasso = linear_model.Lasso(max_iter=10000, random_state=0)
param_grid = {"alpha": alphas}

lasso_cv = GridSearchCV(lasso, param_grid, cv=5, scoring="r2")
lasso_cv.fit(X_train, y_train)

best_alpha = lasso_cv.best_params_["alpha"]
lasso_best = linear_model.Lasso(alpha=best_alpha, max_iter=10000, random_state=0)
lasso_best.fit(X_train, y_train)

r2_train_lasso = r2_score(y_train, lasso_best.predict(X_train))
r2_test_lasso = r2_score(y_test, lasso_best.predict(X_test))

best_alpha, r2_train_lasso, r2_test_lasso

(np.float64(0.20691380811147903), 0.6193767419217198, 0.2692937624491213)

In [6]:
print("OLS(all):", r2_test_ols_full)
print("Lasso:", r2_test_lasso)

OLS(all): 0.22763977192137097
Lasso: 0.2692937624491213
