In [56]:
from sklearn.datasets import fetch_california_housing

In [57]:
df = fetch_california_housing(as_frame=True)

In [58]:
X = df.data
y = df.target

In [59]:
from sklearn.model_selection import train_test_split

In [70]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 10
)

In [71]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV

In [72]:
ols = Pipeline([
    ("scaler", StandardScaler()),
    ("ols", LinearRegression())
])

In [73]:
ridge = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", RidgeCV(
            alphas = np.logspace(-3, 3, 50),
            cv = 5
    ))
])

In [74]:
lasso = Pipeline([
    ("scaler", StandardScaler()),
    ("lasso", LassoCV(
            alphas = np.logspace(-3, 3, 50),
            cv = 5,
    ))
])

In [84]:
en = Pipeline([
    ("scaler", StandardScaler()),
    ("en",ElasticNetCV(
            alphas = np.logspace(-3,3, 50),
            l1_ratio = np.logspace(-3,3,50),
            cv = 5,
            max_iter = 200_000
    ))
])

In [95]:
LassoCV()

0,1,2
,eps,0.001
,n_alphas,'deprecated'
,alphas,'warn'
,fit_intercept,True
,precompute,'auto'
,max_iter,1000
,tol,0.0001
,copy_X,True
,cv,
,verbose,False


In [85]:
def evaluate_mode(name, model, X_train, y_train, X_test, y_test):

    model.fit(X_train, y_train)

    #Training
    y_pred_train = model.predict(X_train)
    mse_train = mean_squared_error(y_train, y_pred_train)
    rmse_train = np.sqrt(mse_train)
    r2_train = r2_score(y_train, y_pred_train)
    sse_train = np.sum((y_train - y_pred_train) ** 2)

    #Testing
    y_pred_test = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_pred_test)
    rmse_test = np.sqrt(mse_test)
    r2_test = r2_score(y_test, y_pred_test)
    sse_test = np.sum((y_test - y_pred_test) ** 2)

    results = pd.DataFrame([
        {"Model": name, "Split": "Train", "R^2": r2_train, "RMSE": rmse_train, "MSE": mse_train, "SSE": sse_train},
        {"Model": name, "Split": "Test", "R^2": r2_test, "RMSE": rmse_test, "MSE": mse_test, "SSE": sse_test},
    ])
    
    return results;

In [86]:
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

In [87]:
results = pd.concat([
    evaluate_mode("OLS", ols, X_train, y_train, X_test, y_test),
    evaluate_mode("Ridge", ridge, X_train, y_train, X_test, y_test),
    evaluate_mode("Lasso", lasso, X_train, y_train, X_test, y_test),
    evaluate_mode("EN", en, X_train, y_train, X_test, y_test),
]).set_index(["Model", "Split"])

In [88]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,R^2,RMSE,MSE,SSE
Model,Split,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
OLS,Train,0.607405,0.720756,0.51949,8577.815583
OLS,Test,0.600979,0.737892,0.544484,2247.630828
Ridge,Train,0.602753,0.725014,0.525646,8679.460912
Ridge,Test,0.597551,0.741055,0.549162,2266.939944
Lasso,Train,0.595514,0.73159,0.535224,8837.62487
Lasso,Test,0.590292,0.747708,0.559067,2307.828509
EN,Train,0.598974,0.728454,0.530646,8762.01997
EN,Test,0.589071,0.748821,0.560734,2314.708248


In [92]:
print("Best alpha chosen for Ridge:", ridge.named_steps["ridge"].alpha_)
print("Best alpha chosen for Lasso:", lasso.named_steps["lasso"].alpha_)
print("Best alpha chosen for EN:", en.named_steps["en"].alpha_)
print("Best l1_ratio chosen for EN:", en.named_steps["en"].l1_ratio_)

Best alpha chosen for Ridge: 323.745754281764
Best alpha chosen for Lasso: 0.016768329368110083
Best alpha chosen for EN: 0.001
Best l1_ratio chosen for EN: 33.9322177189533
