In [None]:
import pandas as pd
import numpy as np
import warnings
from final_project.modeling import load_models, EVENT_WEIGHT
from final_project.data import read_data, split_data
from final_project.preprocessing import NUM_FEATURES, CAT_FEATURES, RESPONDER
from final_project.evaluation import evaluate_predictions, get_pred_summary, get_models_and_val_data
from final_project.plotting import plot_pred_vs_true, plot_day_predictions, plot_feature_relevance, plot_pdps

# Silence feature name warnings 
warnings.filterwarnings(
    "ignore",
    message="X does not have valid feature names",
)

## Load Models

In [None]:
glm_raw, lgbm_raw, X_val_raw, y_val_raw = get_models_and_val_data("clean_data")
glm_clip, lgbm_clip, X_val_clip, y_val_clip = get_models_and_val_data("clean_data_clipped")

In [None]:
df_pred_raw = get_pred_summary(glm_raw, lgbm_raw, X_val_raw, y_val_raw)
df_pred_clip = get_pred_summary(glm_clip, lgbm_clip, X_val_clip, y_val_clip)

## Evaluate Models
Added baseline (past_50m_span_ewm_vol) to compare to models.

In [None]:
# Evaluate glm
glm_eval_raw = evaluate_predictions(
    df_pred_raw["y_true"], df_pred_raw["glm_y_pred"], 
    df_pred_raw["weight"]
)
print("Unclipped:\n")
print(glm_eval_raw)
glm_eval_clip = evaluate_predictions(
    df_pred_clip["y_true"], df_pred_clip["glm_y_pred"], 
    df_pred_clip["weight"]
)
print("\nClipped:\n")
print(glm_eval_clip)

In [None]:
# Evaluate lgbm
lgbm_eval_raw = evaluate_predictions(
    df_pred_raw["y_true"], df_pred_raw["lgbm_y_pred"], 
    df_pred_raw["weight"]
)
print("Unclipped:\n")
print(glm_eval_raw)
lgbm_eval_clip = evaluate_predictions(
    df_pred_clip["y_true"], df_pred_clip["lgbm_y_pred"], 
    df_pred_clip["weight"]
)
print("\nClipped:\n")
print(lgbm_eval_clip)

In [None]:
# Evaluate baseline
base_eval_raw = evaluate_predictions(
    df_pred_raw["y_true"], df_pred_raw["baseline_y_pred"], 
    df_pred_raw["weight"]
)
print("Unclipped:\n")
print(base_eval_raw)
base_eval_clip = evaluate_predictions(
    df_pred_clip["y_true"], df_pred_clip["baseline_y_pred"], 
    df_pred_clip["weight"]
)
print("\nClipped:\n")
print(base_eval_clip)

Looks like the GBT outperformed the GLM on all measures! And clipped data is significantly better than unclipped.

#### Predicted vs. Actual

In [None]:
# Plot pred vs. true for glm
fig = plot_pred_vs_true(df_pred_clip, "glm")

In [None]:
# Plot pred vs. true for lgbm
fig = plot_pred_vs_true(df_pred_clip, "lgbm")

This should look better with logs!

In [None]:
# Plot pred vs. true for glm, log axes
fig = plot_pred_vs_true(df_pred_clip, "glm", log=True)

In [None]:
# Plot pred vs. true for lgbm, log axes
fig = plot_pred_vs_true(df_pred_clip, "lgbm", log=True)

In [None]:
# Plot pred vs. true for baseline, log axes
fig = plot_pred_vs_true(df_pred_clip, "baseline", log=True)

Look at predicted value throughout the day.

In [None]:
fig = plot_day_predictions(df_pred_clip, "2025-10-30")
fig = plot_day_predictions(df_pred_clip, "2025-07-16")

## Feature Relevance and PDPs

In [None]:
# Plot glm features
glm_top_5 = plot_feature_relevance(glm_clip, X_val_clip, y_val_clip)

In [None]:
# Plot lgbm features
lgbm_top_5 = plot_feature_relevance(lgbm_clip, X_val_clip, y_val_clip)

In [None]:
plot_pdps(glm_clip, X_val_clip, y_val_clip, n_top=5)

In [None]:
plot_pdps(lgbm_clip, X_val_clip, y_val_clip, n_top=5)