In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_diabetes

In [2]:
data = load_diabetes()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

print("Dataset shape:", df.shape)
print("\nFeatures:", data.feature_names)
print("\nFirst few rows:")
print(df.head())

Dataset shape: (442, 11)

Features: ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

First few rows:
        age       sex       bmi        bp        s1        s2        s3  \
0  0.038076  0.050680  0.061696  0.021872 -0.044223 -0.034821 -0.043401   
1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163  0.074412   
2  0.085299  0.050680  0.044451 -0.005670 -0.045599 -0.034194 -0.032356   
3 -0.089063 -0.044642 -0.011595 -0.036656  0.012191  0.024991 -0.036038   
4  0.005383 -0.044642 -0.036385  0.021872  0.003935  0.015596  0.008142   

         s4        s5        s6  target  
0 -0.002592  0.019907 -0.017646   151.0  
1 -0.039493 -0.068332 -0.092204    75.0  
2 -0.002592  0.002861 -0.025930   141.0  
3  0.034309  0.022688 -0.009362   206.0  
4 -0.002592 -0.031988 -0.046641   135.0  


In [3]:
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)

rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
r2_ridge = r2_score(y_test, y_pred_ridge)

print("--- Ridge Regression (alpha=1.0) ---")
print(f"RMSE: {rmse_ridge:.4f}")
print(f"R² Score: {r2_ridge:.4f}")

--- Ridge Regression (alpha=1.0) ---
RMSE: 55.4745
R² Score: 0.4192


In [5]:
lasso = Lasso(alpha=1.0)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)

rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
r2_lasso = r2_score(y_test, y_pred_lasso)

print("--- Lasso Regression (alpha=1.0) ---")
print(f"RMSE: {rmse_lasso:.4f}")
print(f"R² Score: {r2_lasso:.4f}")

--- Lasso Regression (alpha=1.0) ---
RMSE: 58.3402
R² Score: 0.3576


In [6]:
print("\n--- Feature Coefficients ---")
print("\nRidge coefficients:")
for feat, coef in zip(data.feature_names, ridge.coef_):
    print(f"  {feat}: {coef:.4f}")

print("\nLasso coefficients:")
for feat, coef in zip(data.feature_names, lasso.coef_):
    print(f"  {feat}: {coef:.4f}")


--- Feature Coefficients ---

Ridge coefficients:
  age: 45.3674
  sex: -76.6661
  bmi: 291.3388
  bp: 198.9958
  s1: -0.5303
  s2: -28.5770
  s3: -144.5119
  s4: 119.2601
  s5: 230.2216
  s6: 112.1498

Lasso coefficients:
  age: 0.0000
  sex: -0.0000
  bmi: 413.4318
  bp: 34.8305
  s1: 0.0000
  s2: 0.0000
  s3: -0.0000
  s4: 0.0000
  s5: 258.1529
  s6: 0.0000


In [7]:
print("\n--- Coefficient Comparison ---")
coef_df = pd.DataFrame({
    'Feature': data.feature_names,
    'Ridge': ridge.coef_,
    'Lasso': lasso.coef_
})
print(coef_df)


--- Coefficient Comparison ---
  Feature       Ridge       Lasso
0     age   45.367377    0.000000
1     sex  -76.666086   -0.000000
2     bmi  291.338832  413.431848
3      bp  198.995817   34.830515
4      s1   -0.530310    0.000000
5      s2  -28.577050    0.000000
6      s3 -144.511905   -0.000000
7      s4  119.260066    0.000000
8      s5  230.221608  258.152894
9      s6  112.149830    0.000000
