In [None]:
import pandas as pd
from agent.text_agent import run_from_text

df = pd.read_csv("../data/model_data_nf_v1.csv")

result = run_from_text(
    df,
    "Test whether higher annual mileage leads to higher loss ratio"
)

result["decision"]


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# --- Lift chart: rank by annual mileage (higher = higher risk) ---
feature = "AVG_ANNUAL_MILEAGE"
loss_col = "M3_PROJECTED_AL_LOSS_ADJ"
premium_col = "AL_EARNED_PREMIUM_ADJ_ONLEVEL"
exposure_col = "EARNED_PU_YEARS"

plot_df = df.copy()
if "cross_val_split" in plot_df.columns:
    plot_df = plot_df[plot_df["cross_val_split"].fillna("") != "exclude"]

plot_df = plot_df[[feature, loss_col, premium_col, exposure_col]].dropna()
plot_df = plot_df[(plot_df[premium_col] > 0) & (plot_df[exposure_col] > 0) & (plot_df[loss_col] >= 0)]

ranked = plot_df.sort_values(feature, ascending=False).copy()
ranked["cum_exposure_pct"] = ranked[exposure_col].cumsum() / ranked[exposure_col].sum()
ranked["cum_loss_pct"] = ranked[loss_col].cumsum() / ranked[loss_col].sum()

# Add origin and ensure we end at (1, 1)
x = np.concatenate([[0.0], ranked["cum_exposure_pct"].to_numpy(), [1.0]])
y = np.concatenate([[0.0], ranked["cum_loss_pct"].to_numpy(), [1.0]])

fig, ax = plt.subplots(figsize=(7, 5))
ax.plot(x, y, linewidth=2, label="Ranked by HIGH annual mileage")
ax.plot([0, 1], [0, 1], linestyle="--", linewidth=1.5, label="Random (baseline)")
ax.set_title("Lift chart: cumulative loss captured vs cumulative exposure")
ax.set_xlabel("Cumulative exposure share")
ax.set_ylabel("Cumulative loss share")
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.grid(True, alpha=0.3)
ax.legend(loc="lower right")

out_path = "mileage_lift.png"  # saved alongside this notebook
fig.tight_layout()
fig.savefig(out_path, dpi=200)
plt.show()

out_path
