In [169]:
from sklearn.datasets import make_regression
import pandas as pd
pd.set_option("display.float_format","{:.4f}".format)

In [204]:
X, y = make_regression(n_samples = 100, n_features = 100, n_informative = 10, noise = 0.5,
                      random_state = 42)

In [205]:
from sklearn.model_selection import train_test_split

In [206]:
X_train, X_test , y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state  = 42)

In [207]:
def evaluate_model(name, model, X_train, y_train, X_test, y_test):
    
    model.fit(X_train, y_train)

    if hasattr(model, "best_estimator_"):
        est = model.best_estimator_
    else:
        est = model
    
    #Training
    y_pred_train = est.predict(X_train)
    mse_train = mean_squared_error(y_train, y_pred_train)
    rmse_train = np.sqrt(mse_train)
    r2_train = r2_score(y_train, y_pred_train)
    sse_train = np.sum((y_train - y_pred_train) ** 2)

    #Testing
    y_pred_test = est.predict(X_test)
    mse_test = mean_squared_error(y_test, y_pred_test)
    rmse_test = np.sqrt(mse_test)
    r2_test = r2_score(y_test, y_pred_test)
    sse_test = np.sum((y_test - y_pred_test) ** 2)

    results = pd.DataFrame([
        {"Model": name, "Split": "Train", "R^2": r2_train, "RMSE": rmse_train, "MSE": mse_train, "SSE": sse_train},
        {"Model": name, "Split": "Test",  "R^2": r2_test,  "RMSE": rmse_test,  "MSE": mse_test,  "SSE": sse_test},
    ]).set_index(["Model", "Split"])
    
    return results;

In [208]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

In [209]:
ols = Pipeline([
    ("scaler", StandardScaler()),
    ("ols", LinearRegression())
])

In [210]:
ridge_pipe =  Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", Ridge())
])

In [211]:
ridge_grid = {
    "ridge__alpha": np.logspace(-3, 3, 50)
}

In [212]:
ridge = GridSearchCV(
    ridge_pipe,
    ridge_grid,
    cv =5,
    n_jobs=-1
)

In [213]:
lasso_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lasso", Lasso(max_iter=200_000))
])

In [214]:
lasso_grid = {
    "lasso__alpha": np.logspace(-3, 3,50)
}

In [215]:
lasso = GridSearchCV(
    lasso_pipe,
    lasso_grid,
    cv = 5,
    n_jobs=-1
)

In [216]:
elastic_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("elastic", ElasticNet(max_iter=200_000))
])

In [217]:
elastic_grid = {
    "elastic__alpha" : np.logspace(-3, 3, 50),
    "elastic__l1_ratio": np.linspace(0.1,0.9,5)
}

In [218]:
elastic = GridSearchCV(
    elastic_pipe,
    elastic_grid,
    cv = 5,
    n_jobs=-1
)

In [219]:
pd.concat([
    evaluate_model("OLS", ols, X_train, y_train, X_test, y_test),
    evaluate_model("Ridge", ridge, X_train, y_train, X_test, y_test),
    evaluate_model("Lasso", lasso, X_train, y_train, X_test, y_test),
    evaluate_model("Elastic", elastic, X_train, y_train, X_test, y_test)
])

Unnamed: 0_level_0,Unnamed: 1_level_0,R^2,RMSE,MSE,SSE
Model,Split,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
OLS,Train,1.0,0.0,0.0,0.0
OLS,Test,0.6407,75.1957,5654.3979,113087.9584
Ridge,Train,0.9987,5.0918,25.9263,2074.1026
Ridge,Test,0.6569,73.4819,5399.5848,107991.6951
Lasso,Train,1.0,0.4785,0.2289,18.3137
Lasso,Test,1.0,0.5733,0.3286,6.5729
Elastic,Train,0.9997,2.3272,5.4159,433.2735
Elastic,Test,0.9725,20.8036,432.7892,8655.7845


In [203]:
print("best alpha for ridge:", ridge.best_params_)
print("best alpha for lasso:", lasso.best_params_)
print("best alpha for elastic:", elastic.best_params_)

best alpha for ridge: {'ridge__alpha': 0.005428675439323859}
best alpha for lasso: {'lasso__alpha': 0.001}
best alpha for elastic: {'elastic__alpha': 0.001, 'elastic__l1_ratio': 0.9}
