In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing 

california_housing = fetch_california_housing()

df = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
df['Target'] = california_housing.target

X = df.drop(columns='Target')
y = df['Target']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
ridge = Ridge()
param_grid = {'alpha': [0.01, 0.1, 1, 10]}
grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_alpha = grid_search.best_params_['alpha']
print(f"Optimal Alpha: {best_alpha}")
best_ridge = grid_search.best_estimator_
best_ridge.fit(X_train, y_train)
y_pred = best_ridge.predict(X_test)

r2_best = r2_score(y_test, y_pred)
mse_best = mean_squared_error(y_test, y_pred)
print(f"R² (Best Model): {r2_best}")
print(f"MSE (Best Model): {mse_best}")

untuned_ridge = Ridge(alpha=1.0)
untuned_ridge.fit(X_train, y_train)
y_pred_untuned = untuned_ridge.predict(X_test)

r2_untuned = r2_score(y_test, y_pred_untuned)
mse_untuned = mean_squared_error(y_test, y_pred_untuned)
print(f"R² (Untuned Model): {r2_untuned}")
print(f"MSE (Untuned Model): {mse_untuned}")

comparison_df = pd.DataFrame({
    'Model': ['Tuned Model', 'Untuned Model'],
    'R²': [r2_best, r2_untuned],
    'MSE': [mse_best, mse_untuned]
})
print(comparison_df)


Optimal Alpha: 1
R² (Best Model): 0.5758185345441323
MSE (Best Model): 0.555851200736751
R² (Untuned Model): 0.5758185345441323
MSE (Untuned Model): 0.555851200736751
           Model        R²       MSE
0    Tuned Model  0.575819  0.555851
1  Untuned Model  0.575819  0.555851
