# Ridge regression

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.base import BaseEstimator
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

random_seed = 42

Multicollinearity.

In [None]:
#@title
X, y = make_regression(n_samples=100, n_features=1, noise=3, random_state=random_seed)
X = X.repeat(2, axis=1)

In [None]:
x_noise = 0.001 # @param {type: "slider", min: 0.001, max: 0.25, step: 0.001}

X_copy = X.copy()
X_copy += x_noise * np.random.randn(100, 2)

lr = LinearRegression()
lr.fit(X_copy, y)

xrange = np.linspace(X_copy.min(axis=0), X_copy.max(axis=0), 100)
xx1, xx2 = np.meshgrid(xrange[:, 0], xrange[:, 1])
reg_plane = lr.intercept_ + lr.coef_[0] * xx1 + lr.coef_[1] * xx2

fig = px.scatter_3d(x=X_copy[:, 0], y=X_copy[:, 1], z=y, width=800, height=800)
fig.update_traces(marker=dict(size=5))
fig.add_surface(x=xx1, y=xx2, z=reg_plane, opacity=0.5)

fig.update_layout(
    title="b0: %.2f,  b1: %.2f,  b2: %.2f" % (lr.intercept_, lr.coef_[0], lr.coef_[1]),
    scene_camera=dict(eye=dict(x=-1.3, y=-1.3, z=0.1)),
    title_x=0.5)

fig.show()

Ridge regression: $\hat \beta_R = V(\Sigma^2 + \lambda)^{-1}\Sigma U^T y$

In [None]:
class Ridge(BaseEstimator):

  def __init__(self, lamb=0.0, fit_intercept=True, sv_tol=1e-15):
    self.lamb = lamb
    self.fit_intercept = fit_intercept
    self.sv_tol = sv_tol
  
  def fit(self, X, y):
    X_mean = X.mean(axis=0)
    y_mean = y.mean(axis=0)
    X = X - X_mean
    y = y - y_mean

    U, s, Vt = np.linalg.svd(X, full_matrices=False)

    idx = s > self.sv_tol
    U = U[:, idx]
    s = s[idx]
    Vt = Vt[idx]
    s = (1 / (s ** 2 + lamb)) * s

    coef = Vt.T @ np.diag(s) @ U.T @ y
    self.coef_ = coef.ravel()
    if self.fit_intercept:
      self.intercept_ = y_mean - np.dot(X_mean, self.coef_)
    else:
      self.intercept_ = 0.0

  def predict(self, X):
    y_pred = self.intercept_ + X @ self.coef_
    return y_pred


In [None]:
x_noise = 0.001 # @param {type: "slider", min: 0.001, max: 0.25, step: 0.001}
lamb = 10 # @param {type: "slider", min: 0.0, max: 10, step: 0.001}

X_copy = X.copy()
X_copy += x_noise * np.random.randn(100, 2)

lr = Ridge(lamb=lamb)
lr.fit(X_copy, y)

xrange = np.linspace(X_copy.min(axis=0), X_copy.max(axis=0), 100)
xx1, xx2 = np.meshgrid(xrange[:, 0], xrange[:, 1])
reg_plane = lr.intercept_ + lr.coef_[0] * xx1 + lr.coef_[1] * xx2

fig = px.scatter_3d(x=X_copy[:, 0], y=X_copy[:, 1], z=y, width=800, height=800)
fig.update_traces(marker=dict(size=5))
fig.add_surface(x=xx1, y=xx2, z=reg_plane, opacity=0.5)

fig.update_layout(
    title="b0: %.2f,  b1: %.2f,  b2: %.2f" % (lr.intercept_, lr.coef_[0], lr.coef_[1]),
    scene_camera=dict(eye=dict(x=-1.3, y=-1.3, z=0.1)),
    title_x=0.5)

fig.show()

Overfitting.

In [None]:
#@title
X, y = make_regression(n_samples=20, n_features=1, 
                       noise=20, random_state=random_seed)
y_outlier = y.copy()
y_outlier[X.argmax()] += 200

In [None]:
# lamb = 0 #@param {type: "slider", min: 0, max: 10, step: 0.1}

plt.figure(figsize=(8, 6))

model = LinearRegression()
model.fit(X, y)

model_outlier = LinearRegression()
model_outlier.fit(X, y_outlier)

model_ridge  = Ridge(lamb=lamb)
model_ridge.fit(X, y_outlier)

x_line = np.linspace(X.min(), X.max(), 100)

model_line = model.intercept_ + model.coef_ * x_line
model_of_line = model_outlier.intercept_ + model_outlier.coef_ * x_line
model_ridge_line = model_ridge.intercept_ + model_ridge.coef_ * x_line


sns.scatterplot(x=X.flatten(), y=y_outlier, s=200, edgecolor='k')
sns.lineplot(x=x_line, y=model_line, linewidth=3, color='r', label='OLS')
sns.lineplot(x=x_line, y=model_of_line, linewidth=3, color='y', linestyle='--', label='OLS outlier')
sns.lineplot(x=x_line, y=model_ridge_line, linewidth=3, color='teal', linestyle='-.', label='Ridge')

plt.title("Effect of outliers on OLS regression", size=16)
plt.xlabel("x", size=14)
plt.ylabel("y", size=14)

plt.show()

Reduce overfitting in polynomial regression.

In [None]:
#@title
X, _ = make_regression(n_samples=20, n_features=1, random_state=random_seed)
y = -1 * X ** 3 + 5 * X ** 2 + X + 5 * np.random.rand(*X.shape)

In [None]:
lamb = 0 #@param {type: "slider", min: 0, max: 3, step: 0.1}

pf = PolynomialFeatures(degree=10, include_bias=False)
X_poly = pf.fit_transform(X)

rr = Ridge(lamb=lamb)
rr.fit(X_poly, y)

x_line = np.linspace(X.min(), X.max(), 100)
x_line_poly = pf.transform(x_line[:, np.newaxis])

model_ridge_line = rr.intercept_ + x_line_poly @ rr.coef_

plt.figure(figsize=(8, 6))

sns.lineplot(x=x_line, y=model_ridge_line, linewidth=3, 
             color='r', label='$\lambda=%.2f$' % (lamb))

plt.scatter(X.flatten(), y, s=100, linewidth=2, edgecolor='k')
plt.title("x vs y plot", size=16)
plt.xlabel("x", size=14)
plt.ylabel("y", size=14)
plt.show()

Why is it called Ridge regression?

In [None]:
#@title
X, _ = make_regression(n_samples=1000, n_features=1, noise=3, random_state=random_seed)
X = X.repeat(2, axis=1) + 0.01 * np.random.randn(1000, 2)

y = 3 * X[:, 0] + 3 * X[:, 1]
y = y[:, np.newaxis]

b = np.linspace((2, 2), (4, 4), 100)
bb1, bb2 = np.meshgrid(b[:, 0], b[:, 1])
bb = np.stack((bb1, bb2))
bb = bb.reshape(2, -1)
mse = np.mean((y - X @ bb) ** 2, axis=0)
mse = mse.reshape(100, 100)

fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'surface'}, {'type': 'surface'}]])

fig.add_trace(go.Surface(x=bb1, y=bb2, z=mse), row=1, col=1)

b = np.linspace((-5, -5), (10, 5), 100)
bb1, bb2 = np.meshgrid(b[:, 0], b[:, 1])
bb = np.stack((bb1, bb2))
bb = bb.reshape(2, -1)
mse = np.mean((y - X @ bb) ** 2, axis=0)
mse = mse.reshape(100, 100) + 1 * bb1.T @ bb2

fig.add_trace(go.Surface(x=bb1, y=bb2, z=mse), row=1, col=2)

fig.update_traces(showscale=False)
fig.update_layout(width=1200, title="OLS vs Ridge", title_x=0.5)

fig.update_layout(scene=dict(xaxis_title='b1', yaxis_title='b2', zaxis_title='MSE'))

fig.show()