# Experience Study MVP (Lapse) — Starter Notebook

Compute crude lapse rates by duration and apply a simple Bühlmann-Straub credibility blend.

**Outputs:** CSV tables + PNG charts in `../results/`

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

DATA_DIR = Path("../data")
RESULTS_DIR = Path("../results")
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

panel = pd.read_csv(DATA_DIR / "synthetic_policy_months.csv")
panel.head()


In [None]:

grp = panel.groupby("duration_month").agg(
    exposure=("exposure", "sum"),
    lapses=("termination_lapse", "sum")
).reset_index()
grp["crude_lapse_rate"] = grp["lapses"] / grp["exposure"]
grp.to_csv(RESULTS_DIR / "lapse_rates_by_duration.csv", index=False)
grp.head(12)


In [None]:

k = 50.0
overall = panel["termination_lapse"].sum() / panel["exposure"].sum()
grp["Z"] = grp["exposure"] / (grp["exposure"] + k)
grp["cred_lapse_rate"] = grp["Z"] * grp["crude_lapse_rate"] + (1 - grp["Z"]) * overall

grp.to_csv(RESULTS_DIR / "lapse_rates_by_duration_cred.csv", index=False)

plt.figure()
plt.plot(grp["duration_month"], grp["crude_lapse_rate"], label="Crude")
plt.plot(grp["duration_month"], grp["cred_lapse_rate"], label="Credibility")
plt.title("Lapse Rate by Duration (Crude vs Credibility)")
plt.xlabel("Duration (months)")
plt.ylabel("Lapse Rate")
plt.legend()
plt.tight_layout()
plt.savefig(RESULTS_DIR / "lapse_curve.png", dpi=150)
plt.show()


In [None]:

dim = panel.groupby(["duration_month", "risk_class"]).agg(
    exposure=("exposure", "sum"),
    lapses=("termination_lapse", "sum")
).reset_index()
dim["crude_lapse_rate"] = dim["lapses"] / dim["exposure"]
overall_by_rc = dim.groupby("risk_class").apply(lambda df: df["lapses"].sum() / df["exposure"].sum()).to_dict()
k = 50.0
dim["Z"] = dim["exposure"] / (dim["exposure"] + k)
dim["overall_rc"] = dim["risk_class"].map(overall_by_rc)
dim["cred_lapse_rate"] = dim["Z"] * dim["crude_lapse_rate"] + (1 - dim["Z"]) * dim["overall_rc"]
dim.to_csv(RESULTS_DIR / "lapse_rates_by_duration_risk.csv", index=False)

for rc in ["Standard", "Preferred"]:
    sub = dim[dim["risk_class"] == rc].sort_values("duration_month")
    plt.figure()
    plt.plot(sub["duration_month"], sub["crude_lapse_rate"], label=f"{rc} - Crude")
    plt.plot(sub["duration_month"], sub["cred_lapse_rate"], label=f"{rc} - Credibility")
    plt.title(f"Lapse Rate by Duration — {rc}")
    plt.xlabel("Duration (months)")
    plt.ylabel("Lapse Rate")
    plt.legend()
    plt.tight_layout()
    plt.savefig(RESULTS_DIR / f"lapse_curve_{rc.lower()}.png", dpi=150)
    plt.show()
