# Compare GLM variants with IGBM
Tune and evaluate Ridge, Lasso, and ElasticNet on the saved train/test split. Lastly compare with IGBM

In [1]:
from pathlib import Path
import sys

# Make repo src importable
repo_root = Path.cwd().resolve()
while not (repo_root / 'src').is_dir() and repo_root.parent != repo_root:
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root / 'src'))

import joblib
import pandas as pd
from package.models.compare_glm_models import compare_glm_models
from package.models.model_training import load_df_clean, split_random_numeric, TARGET_COLUMN
from package.version_check import ensure_sklearn_version

ensure_sklearn_version()

# Load data and reproduce the split
df = load_df_clean()
X_train, X_test, y_train, y_test = split_random_numeric(df, target=TARGET_COLUMN)

results = compare_glm_models(X_train, X_test, y_train, y_test, seed=42, n_iter=20)
results

Unnamed: 0_level_0,rmse,mae,r2,best_params
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ridge,0.142589,0.100233,0.252937,{'model__alpha': 2.465832945854912}
lasso,0.142313,0.09994,0.255824,{'model__alpha': 0.0005415244119402539}
elasticnet,0.142266,0.099876,0.256317,"{'model__alpha': 0.0014742753159914669, 'model..."


In [2]:
import joblib

def show_alpha_from_results(results_df):
    """Pass the DataFrame returned by compare_glm_models."""
    return results_df.loc["elasticnet", "best_params"]["model__alpha"]

def show_alpha_from_artifact(path="artifacts/glm_model.joblib"):
    """Load the saved tuned ElasticNet pipeline and return its alpha."""
    pipe = joblib.load(path)
    return pipe.named_steps["model"].alpha


results_alpha = compare_glm_models(X_train, X_test, y_train, y_test)
alpha = show_alpha_from_results(results_alpha)
print(alpha)


0.0014742753159914669


In [3]:
import joblib

def show_l1_ratio_from_results(results_df):
    """Pass the DataFrame returned by compare_glm_models."""
    return results_df.loc["elasticnet", "best_params"]["model__l1_ratio"]

def show_l1_ratio_from_artifact(path="artifacts/glm_model.joblib"):
    """Load the saved tuned ElasticNet pipeline and return its l1_ratio."""
    pipe = joblib.load(path)
    return pipe.named_steps["model"].l1_ratio

results_l1 = compare_glm_models(X_train, X_test, y_train, y_test)
l1ratio = show_l1_ratio_from_results(results_l1)
print(l1ratio)


0.3663618432936917


In [4]:
print("Based on RandomizedSearchCV tuning ElasticNet parameters based on 5-fold cross-validation splits, the best alpha is {} and the best l1_ratio is {}.".format(alpha, l1ratio))

Based on RandomizedSearchCV tuning ElasticNet parameters based on 5-fold cross-validation splits, the best alpha is 0.0014742753159914669 and the best l1_ratio is 0.3663618432936917.


In [8]:
import pandas as pd
from package.feature_importance import load_artifacts
from package.models.model_training import evaluate_regression

# 1) Round the existing GLM results
rounded = results.copy()
rounded[["rmse", "mae", "r2"]] = rounded[["rmse", "mae", "r2"]].round(5)

# 2) Load tuned LGBM and its test data
artifacts = load_artifacts()  # loads glm_model, lgbm_model, test_data
lgbm_model = artifacts["lgbm_model"]
X_test = artifacts["test_data"]["X_test"]
y_test = artifacts["test_data"]["y_test"]

# 3) Compute LGBM metrics and params
lgbm_pred = lgbm_model.predict(X_test)
lgbm_metrics = evaluate_regression(y_test, lgbm_pred)

lgbm_row = pd.DataFrame(
    [{
        "model": "lgbm",
        "rmse": round(lgbm_metrics["rmse"], 5),
        "mae": round(lgbm_metrics["mae"], 5),
        "r2": round(lgbm_metrics["r2"], 5),
        "best_params": lgbm_model.named_steps["model"].get_params(),
    }]
).set_index("model")

# 4) Combine and show
results_with_lgbm = pd.concat([rounded, lgbm_row])
results_with_lgbm




Unnamed: 0_level_0,rmse,mae,r2,best_params
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ridge,0.14259,0.10023,0.25294,{'model__alpha': 2.465832945854912}
lasso,0.14231,0.09994,0.25582,{'model__alpha': 0.0005415244119402539}
elasticnet,0.14227,0.09988,0.25632,"{'model__alpha': 0.0014742753159914669, 'model..."
lgbm,0.12723,0.07667,0.4052,"{'boosting_type': 'gbdt', 'class_weight': None..."
