In [1]:
# growth_distribution.ipynb

%matplotlib inline
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, pathlib

# get project root (one level up from notebooks/)
ROOT = pathlib.Path().resolve().parent

config_path = ROOT / "data" / "config.yaml"
census_path = ROOT / "data" / "census_data.csv"

with open(config_path) as f:
    config = yaml.safe_load(f)

census_df = pd.read_csv(
    census_path,
    parse_dates=['birth_date','hire_date','termination_date']
)

# 2. Simulation function
def run_net_growth(config, census_df, runs=500):
    results = []
    n0 = len(census_df)
    for i in range(runs):
        model = RetirementPlanModel(census_df.copy(), config)
        for _ in range(config["projection_years"]):
            model.step()
        n1 = sum(1 for a in model.population.values() if a.is_active)
        results.append((n1 - n0) / n0)
    return results

# 3. Run many simulations
net_growths = run_net_growth(config, census_df, runs=300)

# 4. Plot the distribution
plt.figure(figsize=(8,5))
sns.histplot(net_growths, bins=30, kde=True, color="C0")
plt.axvline(x=config["annual_growth_rate"], color="C1", linestyle="--",
            label=f"Target {config['annual_growth_rate']:.1%}")
plt.title("Distribution of Net Headcount Growth (% per year)")
plt.xlabel("Net Growth Rate")
plt.ylabel("Frequency")
plt.legend()
plt.tight_layout()
plt.show()

# 5. (Optional) boxplot
plt.figure(figsize=(4,2))
sns.boxplot(x=net_growths, color="C2")
plt.xlabel("Net Growth Rate")
plt.title("Net Growth Boxplot")
plt.show()

NameError: name 'RetirementPlanModel' is not defined