# Compare GLM variants
Tune and evaluate Ridge, Lasso, and ElasticNet on the saved train/test split.

In [1]:
from pathlib import Path
import sys

# Make repo src importable
repo_root = Path.cwd().resolve()
while not (repo_root / 'src').is_dir() and repo_root.parent != repo_root:
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root / 'src'))

import joblib
import pandas as pd
from package.models.compare_glm_models import compare_glm_models
from package.models.model_training import load_df_clean, split_random_numeric, TARGET_COLUMN
from package.version_check import ensure_sklearn_version

ensure_sklearn_version()

# Load data and reproduce the split
df = load_df_clean()
X_train, X_test, y_train, y_test = split_random_numeric(df, target=TARGET_COLUMN)

results = compare_glm_models(X_train, X_test, y_train, y_test, seed=42, n_iter=20)
results

Unnamed: 0_level_0,rmse,mae,r2,best_params
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ridge,0.142589,0.100233,0.252937,{'model__alpha': 2.465832945854912}
lasso,0.142313,0.09994,0.255824,{'model__alpha': 0.0005415244119402539}
elasticnet,0.142266,0.099876,0.256317,"{'model__alpha': 0.0014742753159914669, 'model..."


In [3]:
import joblib

def show_alpha_from_results(results_df):
    """Pass the DataFrame returned by compare_glm_models."""
    return results_df.loc["elasticnet", "best_params"]["model__alpha"]

def show_alpha_from_artifact(path="artifacts/glm_model.joblib"):
    """Load the saved tuned ElasticNet pipeline and return its alpha."""
    pipe = joblib.load(path)
    return pipe.named_steps["model"].alpha


results = compare_glm_models(X_train, X_test, y_train, y_test)
show_alpha_from_results(results)


0.0014742753159914669

In [2]:
import joblib

def show_l1_ratio_from_results(results_df):
    """Pass the DataFrame returned by compare_glm_models."""
    return results_df.loc["elasticnet", "best_params"]["model__l1_ratio"]

def show_l1_ratio_from_artifact(path="artifacts/glm_model.joblib"):
    """Load the saved tuned ElasticNet pipeline and return its l1_ratio."""
    pipe = joblib.load(path)
    return pipe.named_steps["model"].l1_ratio

results = compare_glm_models(X_train, X_test, y_train, y_test)
show_l1_ratio_from_results(results)


0.3663618432936917