In [None]:
import pandas as pd
import numpy as np
from features.preprocess import load_data  # Adjust if function name differs
from utils.metrics import regression_metrics, quantile_coverage, print_metrics

from models.catboost_model import train_catboost, train_catboost_quantiles, predict_interval as cat_predict_interval
from models.lightgbm_model import train_lightgbm, train_lightgbm_quantiles, predict_interval as lgbm_predict_interval
from models.random_forest import train_random_forest
from models.linear_model import train_linear_regression
from models.xgboost_model import train_xgboost

In [None]:
X_train, X_test, y_train, y_test = load_data()

In [None]:
catboost_params = {
    "iterations": 800,
    "learning_rate": 0.03,
    "depth": 6,
    "verbose": 0,
    "random_seed": 32
}

catboost_quantile_params = {
    "iterations": 500,
    "learning_rate": 0.03,
    "depth": 6,
    "verbose": 0,
    "random_seed": 42
}

lgbm_params = {
    "n_estimators": 500,
    "learning_rate": 0.03,
    "num_leaves": 31,
    "verbose": -1
}

lgbm_quantile_params = {
    "n_estimators": 500,
    "learning_rate": 0.03,
    "num_leaves": 31,
    "verbose": -1
}

rf_params = {"n_estimators": 200, "max_depth": 6, "random_state": 42}
linreg_params = {}  # scikit-learn LinearRegression has no meaningful hyperparameters
xgb_params = {"n_estimators": 500, "learning_rate": 0.03, "max_depth": 6, "verbosity": 0, "random_state": 42}

In [None]:
cat_model = train_catboost(X_train, y_train, params=catboost_params)
y_pred_cat = cat_model.predict(X_test)
print_metrics("CatBoost Point", regression_metrics(y_test, y_pred_cat))

cat_low = train_catboost_quantiles(X_train, y_train, alpha=0.05, catboost_quantile_params=catboost_quantile_params)
cat_med = train_catboost_quantiles(X_train, y_train, alpha=0.5, catboost_quantile_params=catboost_quantile_params)
cat_high = train_catboost_quantiles(X_train, y_train, alpha=0.95, catboost_quantile_params=catboost_quantile_params)
intervals_cat = cat_predict_interval(cat_low, cat_med, cat_high, X_test)
print_metrics("CatBoost Interval", quantile_coverage(y_test, intervals_cat))

In [None]:
lgbm_model = train_lightgbm(X_train, y_train, lgbm_params=lgbm_params)
y_pred_lgbm = lgbm_model.predict(X_test)
print_metrics("LightGBM Point", regression_metrics(y_test, y_pred_lgbm))

lgbm_low = train_lightgbm_quantiles(X_train, y_train, alpha=0.05, lgbm_quantile_params=lgbm_quantile_params)
lgbm_med = train_lightgbm_quantiles(X_train, y_train, alpha=0.5, lgbm_quantile_params=lgbm_quantile_params)
lgbm_high = train_lightgbm_quantiles(X_train, y_train, alpha=0.95, lgbm_quantile_params=lgbm_quantile_params)
intervals_lgbm = lgbm_predict_interval(lgbm_low, lgbm_med, lgbm_high, X_test)
print_metrics("LightGBM Interval", quantile_coverage(y_test, intervals_lgbm))

In [None]:
rf_model = train_random_forest(X_train, y_train, rf_params=rf_params)
y_pred_rf = rf_model.predict(X_test)
print_metrics("Random Forest", regression_metrics(y_test, y_pred_rf))

In [None]:
linreg_model = train_linear_regression(X_train, y_train, linreg_params=linreg_params)
y_pred_lin = linreg_model.predict(X_test)
print_metrics("Linear Regression", regression_metrics(y_test, y_pred_lin))

In [None]:
xgb_model = train_xgboost(X_train, y_train, xgb_params=xgb_params)
y_pred_xgb = xgb_model.predict(X_test)
print_metrics("XGBoost", regression_metrics(y_test, y_pred_xgb))