In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load Diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

X.shape, y.shape

((442, 10), (442,))

In [3]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [4]:
# Initialize Ridge regression model with a regularization parameter (alpha)
ridge_model = Ridge(alpha=1.0)

# Fit the model to the training data
ridge_model.fit(X_train, y_train)

In [5]:
# Predict on the test data
y_pred = ridge_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("R-squared (R2 Score):", r2)

# Display Ridge Coefficients
print("Ridge Coefficients:", ridge_model.coef_)

Mean Squared Error (MSE): 3077.41593882723
R-squared (R2 Score): 0.41915292635986545
Ridge Coefficients: [  45.36737726  -76.66608563  291.33883165  198.99581745   -0.53030959
  -28.57704987 -144.51190505  119.26006559  230.22160832  112.14983004]


In [6]:
from sklearn.linear_model import LinearRegression

# Fit Ordinary Least Squares Regression
ols_model = LinearRegression()
ols_model.fit(X_train, y_train)
ols_pred = ols_model.predict(X_test)

# Evaluate OLS Model
ols_mse = mean_squared_error(y_test, ols_pred)
ols_r2 = r2_score(y_test, ols_pred)

print("OLS Mean Squared Error (MSE):", ols_mse)
print("OLS R-squared (R2 Score):", ols_r2)

OLS Mean Squared Error (MSE): 2900.1936284934827
OLS R-squared (R2 Score): 0.45260276297191915


In [8]:
np.sqrt(ols_mse)/ np.mean(y_test)

0.369427831006025

In [9]:
from sklearn.model_selection import GridSearchCV

# Define a range of alpha values
alpha_values = np.logspace(-3, 3, 100)

# Ridge regression with cross-validation
ridge_cv = GridSearchCV(
    Ridge()
    , param_grid={'alpha': alpha_values}
    , cv=5
    , scoring='neg_mean_squared_error')

ridge_cv.fit(X_train, y_train)

# Best alpha and corresponding performance
print("Best alpha:", ridge_cv.best_params_['alpha'])
print("Best cross-validated MSE:", -ridge_cv.best_score_)

Best alpha: 0.06579332246575682
Best cross-validated MSE: 3121.8854525321613


In [12]:
np.sqrt(-ridge_cv.best_score_)/np.mean(y_train)

0.36343886569029127

In [13]:
# Initialize Ridge regression model with a regularization parameter (alpha)
# ridge_model = Ridge(alpha=0.06579332246575682)

ridge_model = Ridge(alpha=ridge_cv.best_params_['alpha'])

# Fit the model to the training data
ridge_model.fit(X_train, y_train)

In [14]:
# Predict on the test data
y_pred = ridge_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Root Mean Squared Error (MSE):", np.sqrt(mse))
print("R-squared (R2 Score):", r2)

# Display Ridge Coefficients
print("Ridge Coefficients:", ridge_model.coef_.tolist())

Root Mean Squared Error (MSE): 53.500886444859155
R-squared (R2 Score): 0.4597465330835182
Ridge Coefficients: [42.461201768761626, -216.4579609217407, 521.1954406924689, 325.4427784974619, -140.81738216636188, -73.20637980934943, -181.70648106575646, 157.95961835369545, 413.88423995940747, 72.53829889759365]


In [15]:
np.sqrt(mse)/ np.mean(y_test)

0.36700931814339943

In [16]:
np.logspace(-3, 3, 100).tolist()

[0.001,
 0.0011497569953977356,
 0.0013219411484660286,
 0.0015199110829529332,
 0.001747528400007683,
 0.002009233002565048,
 0.0023101297000831605,
 0.0026560877829466868,
 0.0030538555088334154,
 0.003511191734215131,
 0.004037017258596553,
 0.004641588833612782,
 0.005336699231206312,
 0.006135907273413176,
 0.007054802310718645,
 0.008111308307896872,
 0.0093260334688322,
 0.010722672220103232,
 0.012328467394420659,
 0.014174741629268055,
 0.016297508346206444,
 0.01873817422860384,
 0.021544346900318846,
 0.024770763559917114,
 0.02848035868435802,
 0.03274549162877728,
 0.037649358067924674,
 0.04328761281083059,
 0.049770235643321115,
 0.05722367659350217,
 0.06579332246575682,
 0.07564633275546291,
 0.08697490026177834,
 0.1,
 0.11497569953977356,
 0.13219411484660287,
 0.1519911082952933,
 0.1747528400007685,
 0.2009233002565048,
 0.23101297000831603,
 0.26560877829466867,
 0.30538555088334157,
 0.3511191734215131,
 0.40370172585965536,
 0.4641588833612782,
 0.53366992312063