In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [None]:
np.random.seed(42)
n_samples = 50

# True feature
X1 = np.random.rand(n_samples, 1)

In [None]:
# X2 is highly correlated with X1 (almost the same)
# In real life: X1 = "Square Footage", X2 = "Number of Rooms"
X2 = X1 + np.random.normal(0, 0.01, (n_samples, 1))


In [None]:
X3 = np.random.rand(n_samples, 1)


In [None]:
# Stack them
X = np.hstack([X1, X2, X3])

In [None]:
# True Formula: y = 3*X1 + 3*X2 + 0*X3
# (Both X1 and X2 are equally important)
y = 3*X1 + 3*X2 + np.random.randn(n_samples, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

# B. Ridge (L2)
# Ridge loves to share. It will shrink both but keep both.
ridge = Ridge(alpha=0.1)
ridge.fit(X_train, y_train)

# C. Elastic Net (Mixed)
# Attempts to balance selection and grouping.
# l1_ratio=0.5 means 50% Lasso, 50% Ridge
elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic.fit(X_train, y_train)

In [None]:
results = pd.DataFrame({
    'Feature': ['X1 (Important)', 'X2 (Correlated)', 'X3 (Noise)'],
    'True Weights': [3, 3, 0],
    'Lasso': lasso.coef_,
    'Ridge': ridge.coef_.flatten(),
    'ElasticNet': elastic.coef_
})

print("--- How they handle correlated features ---")
print(results.round(3))


In [None]:
x = np.arange(3)
width = 0.2

plt.figure(figsize=(10, 6))
plt.bar(x - width, lasso.coef_, width, label='Lasso (Picks one)', color='red')
plt.bar(x, ridge.coef_.flatten(), width, label='Ridge (Keeps both)', color='blue')
plt.bar(x + width, elastic.coef_, width, label='ElasticNet (Balance)', color='green')

plt.xticks(x, ['X1', 'X2', 'X3 (Noise)'])
plt.ylabel('Coefficient Value')
plt.title('Handling Correlated Features')
plt.legend()
plt.show()