In [2]:
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import cond

In [3]:
np.random.seed(0) 

X = np.random.randn(200, 20)   
beta_true = np.random.randn(20) 
noise = 0.1 * np.random.randn(200) 
y = X @ beta_true + noise       

print("Condition number of X:", cond(X))
print("First 5 values of y:", y[:5])


Condition number of X: 1.703051197787162
First 5 values of y: [-2.4307426  -5.49401837  0.93889084 -2.7802066  -8.99570186]


In [4]:
# ordinary least squares solution
XtX = X.T @ X
Xty = X.T @ y
beta_ols = np.linalg.inv(XtX) @ Xty

print("Estimated coefficients (first 5):", beta_ols[:5])
print("True coefficients (first 5):     ", beta_true[:5])


Estimated coefficients (first 5): [ 0.61845122  1.84002526  0.27128582  1.14240681 -1.74462582]
True coefficients (first 5):      [ 0.61334917  1.84369998  0.27109098  1.13644763 -1.73833187]


In [5]:
error = np.linalg.norm(beta_ols - beta_true)
print("Parameter estimation error:", error)


Parameter estimation error: 0.03209770223469925


In [6]:
U, s, Vt = np.linalg.svd(X, full_matrices=False)
s[-1] = 1e-6   # shrink the smallest singular value
X_bad = U @ np.diag(s) @ Vt

beta_ols_bad = np.linalg.inv(X_bad.T @ X_bad) @ (X_bad.T @ y)

error_bad = np.linalg.norm(beta_ols_bad - beta_true)

print("Condition number of X_bad:", cond(X_bad))
print("Parameter estimation error:", error_bad)


Condition number of X_bad: 17148258.60028229
Parameter estimation error: 9160524.241848685


In [7]:
lam = 1.0 

ridge_matrix = X_bad.T @ X_bad + lam * np.eye(X_bad.shape[1])
beta_ridge = np.linalg.inv(ridge_matrix) @ (X_bad.T @ y)

error_ridge = np.linalg.norm(beta_ridge - beta_true)

print("Ridge parameter estimation error:", error_ridge)


Ridge parameter estimation error: 0.9148071866279041
