# Cashflow Simulation (Fitted Models)

Lightweight simulator using fitted timing + ratio distributions, grade transitions, and copula.
Includes calibrated omega if `omega_selected.csv` is available, and MSCI projection
logic copied from `msci_projection.ipynb`.

In [1]:
from pathlib import Path
import ast
import json
import numpy as np
import pandas as pd
from dataclasses import dataclass, field
from math import sqrt
from scipy import stats

In [None]:
import os
from pathlib import Path
RUN_TAG = os.environ.get("RUN_TAG")
HIST_END = os.environ.get("HIST_END")
if RUN_TAG is None:
    RUN_TAG = HIST_END or "2025Q3"
BASE_OUT = Path("model_fits") / "runs" / RUN_TAG
CALIB_DIR = BASE_OUT / "calibration"
PROJ_DIR = BASE_OUT / "projection"

INPUT_PATH = "anonymized.csv"
INPUT_PATH = os.environ.get("INPUT_PATH", INPUT_PATH)
FIT_DIR = str(CALIB_DIR)
FIT_DIR = os.environ.get("FIT_DIR", FIT_DIR)
COPULA_PATH = str(CALIB_DIR / "copula_params.json")

START_FROM_HIST_END = True  # if HIST_END provided, start projection from that quarter
START_FROM_HIST_END = (os.environ.get("START_FROM_HIST_END", str(START_FROM_HIST_END)).lower() in ("1","true","yes","y"))


START_YEAR = 2025
START_QUARTER = "Q3"
START_YEAR = int(os.environ.get("START_YEAR", START_YEAR))
START_QUARTER = os.environ.get("START_QUARTER", START_QUARTER)
HORIZON_Q = 20
N_SIMS = 1000
N_SIMS = int(os.environ.get("N_SIMS", N_SIMS))
DRAW_RATIO_CAP = 1.0  # allow cumulative draw ratio > 1 (recallables)
SEED = 1234

OMEGA_MODE = "calibrated"  # "none", "global", or "calibrated"
OMEGA_CLIP = (-0.5, 0.5)  # hard cap to avoid explosive NAV

MSCI_PATH = "msci.xlsx"
MSCI_MODE = "unconditional"  # enforced: always use projected MSCI paths
MSCI_SCENARIO = "neutral"     # bullish / neutral / bearish
REP_CAP_P90 = os.environ.get("REP_CAP_P90", "0").lower() in ("1","true","yes","y")
PACE_SCALE = float(os.environ.get("PACE_SCALE", "1.0"))
AUTO_PACE_ONEPASS = os.environ.get("AUTO_PACE_ONEPASS", "true").lower() in ("1","true","yes","y")
PACE_CALIB_N_Q = int(os.environ.get("PACE_CALIB_N_Q", "0"))
if PACE_CALIB_N_Q <= 0:
    PACE_CALIB_N_Q = HORIZON_Q
MSCI_TILT_STRENGTH = 1.15

if not Path(INPUT_PATH).exists():
    candidates = list(Path.cwd().glob("**/anonymized.csv"))
    if not candidates:
        candidates = list(Path.cwd().parent.glob("**/anonymized.csv"))
    if not candidates:
        candidates = list(Path.cwd().parent.parent.glob("**/anonymized.csv"))
    if candidates:
        INPUT_PATH = str(candidates[0])
    else:
        raise FileNotFoundError("anonymized.csv not found. Set INPUT_PATH to the full path.")

# Prefer MSCI.xlsx if present
if not Path(MSCI_PATH).exists():
    candidates = []
    for p in ["MSCI.xlsx", "msci.xlsx"]:
        if Path(p).exists():
            candidates.append(Path(p))
    if not candidates:
        for base in [Path.cwd(), Path.cwd().parent, Path.cwd().parent.parent]:
            for p in ["MSCI.xlsx", "msci.xlsx"]:
                cand = base / p
                if cand.exists():
                    candidates.append(cand)
    if candidates:
        MSCI_PATH = str(candidates[0])

print("Using INPUT_PATH:", INPUT_PATH)
print("Using MSCI_PATH:", MSCI_PATH)

# Auto-resolve FIT_DIR if outputs were written under model_fits/model_fits/outputs
if not Path(FIT_DIR).exists():
    alt = Path("model_fits/model_fits/outputs")
    if alt.exists():
        FIT_DIR = str(alt)


# Ensure FIT_DIR points to an existing calibration folder
def _resolve_fit_dir(fit_dir: str) -> str:
    cand = Path(fit_dir)
    if (cand / "ratio_fit_selected.csv").exists():
        return str(cand)
    # candidate locations relative to project and notebook dirs
    bases = [Path.cwd(), Path.cwd().parent]
    for base in bases:
        for p in [
            base / "model_fits" / "runs" / RUN_TAG / "calibration",
            base / "model_fits" / "model_fits" / "runs" / RUN_TAG / "calibration",
        ]:
            if (p / "ratio_fit_selected.csv").exists():
                return str(p)
    # fallback: first calibration folder with ratio_fit_selected.csv
    hits = list(Path.cwd().glob("**/ratio_fit_selected.csv"))
    if hits:
        return str(hits[0].parent)
    return fit_dir

FIT_DIR = _resolve_fit_dir(FIT_DIR)

Path(FIT_DIR).mkdir(parents=True, exist_ok=True)

Path(CALIB_DIR).mkdir(parents=True, exist_ok=True)
Path(PROJ_DIR).mkdir(parents=True, exist_ok=True)


Using INPUT_PATH: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/anonymized.csv
Using MSCI_PATH: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/MSCI.xlsx


In [3]:
AGE_BINS_Q = [-1, 3, 7, 11, 15, 19, 1000]
AGE_LABELS = ["0-3", "4-7", "8-11", "12-15", "16-19", "20+"]
AGE_BUCKET_ORDER = {a: i for i, a in enumerate(AGE_LABELS)}
GRADE_STATES = ["A", "B", "C", "D"]
GRADE_ANCHOR_Q = 20  # anchor initial grade for first 5 years (20 quarters)
GRADE_UPDATE_ENABLED = os.environ.get("GRADE_UPDATE_ENABLED", "1").lower() in ("1", "true", "yes", "y")
SMALL_SAMPLE_RULE_ENABLED = os.environ.get("SMALL_SAMPLE_RULE_ENABLED", "1").lower() in ("1", "true", "yes", "y")


def _norm_key(s: str) -> str:
    return " ".join(s.strip().lower().replace("_", " ").split())


def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
    col_map = {_norm_key(c): c for c in df.columns}
    def _get(name: str) -> str:
        k = _norm_key(name)
        return col_map.get(k, name)

    rename = {}
    rename[_get("Adj strategy")] = "Adj Strategy"
    rename[_get("Adj Strategy")] = "Adj Strategy"
    rename[_get("Quarter of Transaction Date")] = "Quarter"
    rename[_get("Year of Transaction Date")] = "Year"
    rename[_get("FundID")] = "FundID"
    rename[_get("Grade")] = "Grade"
    rename[_get("Current Grade")] = "Grade_Current"
    rename[_get("CurrentGrade")] = "Grade_Current"
    rename[_get("Grade Current")] = "Grade_Current"
    rename[_get("Grade_Current")] = "Grade_Current"
    rename[_get("Adj Drawdown EUR")] = "Adj Drawdown EUR"
    rename[_get("Adj Repayment EUR")] = "Adj Repayment EUR"
    rename[_get("Recallable")] = "Recallable"
    rename[_get("NAV Adjusted EUR")] = "NAV Adjusted EUR"
    rename[_get("Commitment EUR")] = "Commitment EUR"
    rename[_get("Fund Workflow Stage")] = "Fund Workflow Stage"
    rename[_get("Planned End Date")] = "Planned End Date"
    rename[_get("Planned end date with add. years as per legal doc")] = "Planned End Date"
    rename[_get("Planned End Date as per legal documentation")] = "Planned End Date"
    rename[_get("Signed Amount EUR")] = "Signed Amount EUR"
    rename[_get("Capacity")] = "Capacity"
    rename[_get("Fund_Age_Quarters")] = "Fund_Age_Quarters"
    rename[_get("draw_cum_prev")] = "draw_cum_prev"
    rename[_get("Recallable_Percentage_Decimal")] = "Recallable_Percentage_Decimal"
    rename[_get("Expiration_Quarters")] = "Expiration_Quarters"
    return df.rename(columns=rename)


def parse_quarter(q) -> float:
    if pd.isna(q):
        return np.nan
    if isinstance(q, (int, np.integer, float, np.floating)):
        return float(q)
    s = str(q).strip().upper()
    if s.startswith("Q"):
        s = s[1:]
    try:
        return float(s)
    except Exception:
        return np.nan


def add_quarter_end(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["Quarter"] = df["Quarter"].apply(parse_quarter)
    df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
    m = df["Year"].notna() & df["Quarter"].notna()
    years = df.loc[m, "Year"].astype(int)
    quarters = df.loc[m, "Quarter"].astype(int)
    df.loc[m, "quarter_end"] = pd.PeriodIndex(year=years, quarter=quarters, freq="Q").to_timestamp("Q")
    return df


def apply_current_grade(df: pd.DataFrame, context: str = "") -> pd.DataFrame:
    df = df.copy()

    if "Grade" in df.columns and "Grade_Seed" not in df.columns:
        df["Grade_Seed"] = df["Grade"]

    if "Grade" in df.columns:
        df["Grade"] = df["Grade"].astype(str).str.strip()
        df.loc[df["Grade"].isin(["", "nan", "None", "NaN", "<NA>"]), "Grade"] = np.nan

    if "quarter_end" not in df.columns:
        df = add_quarter_end(df)

    # Optionally bypass grading logic and use provided Grade as-is
    if os.environ.get("GRADE_USE_INPUT", "0").lower() in ("1", "true", "yes", "y"):
        if "Grade" in df.columns:
            df["Grade"] = df["Grade"].astype(str).str.strip()
            df.loc[df["Grade"].isin(["", "nan", "None", "NaN", "<NA>"]), "Grade"] = np.nan
            if "quarter_end" not in df.columns:
                df = add_quarter_end(df)
            df = df.sort_values(["FundID", "quarter_end"])
            df["Grade_Current"] = df.groupby("FundID")["Grade"].ffill()
            df["Grade"] = df["Grade_Current"]
        if context:
            print(f"Using provided Grade for {context}.")
        return df

    df["QPeriod"] = df["quarter_end"].dt.to_period("Q")

    cols = [
        "FundID",
        "Adj Strategy",
        "QPeriod",
        "quarter_end",
        "Adj Drawdown EUR",
        "Adj Repayment EUR",
        "NAV Adjusted EUR",
        "First Closing Date",
        "Grade",
    ]
    cols = [c for c in cols if c in df.columns]
    cash = df[cols].copy()
    cash = cash.rename(
        columns={
            "Adj Strategy": "AdjStrategy",
            "quarter_end": "TransactionDate",
            "First Closing Date": "FirstClosingDate",
        }
    )

    cash["TransactionDate"] = pd.to_datetime(cash["TransactionDate"], errors="coerce")
    if "FirstClosingDate" in cash.columns:
        cash["FirstClosingDate"] = pd.to_datetime(cash["FirstClosingDate"], errors="coerce")
    else:
        cash["FirstClosingDate"] = pd.NaT

    for c in ["Adj Drawdown EUR", "Adj Repayment EUR", "NAV Adjusted EUR"]:
        if c in cash.columns:
            cash[c] = pd.to_numeric(cash[c], errors="coerce").fillna(0.0)
        else:
            cash[c] = 0.0

    cash = cash.dropna(subset=["FundID", "TransactionDate"])

    if cash["FirstClosingDate"].isna().any():
        first_tx = cash.groupby("FundID")["TransactionDate"].transform("min")
        cash["FirstClosingDate"] = cash["FirstClosingDate"].fillna(first_tx)

    if cash.empty:
        return df

    fund_strategy = (
        cash.groupby("FundID")["AdjStrategy"]
        .agg(lambda s: s.mode().iat[0] if len(s.mode()) else s.iloc[0])
        .reset_index()
    )

    q_snap = (
        cash.sort_values(["FundID", "TransactionDate"])
        .groupby(["FundID", "QPeriod"], as_index=False)
        .agg(
            AdjStrategy=("AdjStrategy", "last"),
            FirstClosingDate=("FirstClosingDate", "last"),
            QuarterDrawdown=("Adj Drawdown EUR", "sum"),
            QuarterRepayment=("Adj Repayment EUR", "sum"),
            QuarterEndNAV=("NAV Adjusted EUR", "last"),
            QuarterEndDate=("TransactionDate", "max"),
        )
    )

    q_snap = q_snap.sort_values(["FundID", "QPeriod"])
    q_snap["QuarterEndNAV"] = q_snap.groupby("FundID")["QuarterEndNAV"].ffill().fillna(0)

    q_snap["CumDrawdown"] = q_snap.groupby("FundID")["QuarterDrawdown"].cumsum()
    q_snap["CumRepayment"] = q_snap.groupby("FundID")["QuarterRepayment"].cumsum()
    q_snap["PaidIn"] = q_snap["CumDrawdown"].abs()
    q_snap["Distributed"] = q_snap["CumRepayment"].abs()
    q_snap["NAV"] = q_snap["QuarterEndNAV"].abs()

    q_snap["DPI"] = np.where(q_snap["PaidIn"] > 0, q_snap["Distributed"] / q_snap["PaidIn"], np.nan)
    q_snap["TVPI"] = np.where(
        q_snap["PaidIn"] > 0,
        (q_snap["Distributed"] + q_snap["NAV"]) / q_snap["PaidIn"],
        np.nan,
    )

    def xnpv(rate, cfs, dts):
        dts = np.asarray(dts, dtype="datetime64[ns]")
        cfs = np.asarray(cfs, dtype=float)
        t0 = dts[0]
        day_counts = (dts - t0) / np.timedelta64(1, "D")
        years = day_counts / 365.0
        return np.sum(cfs / ((1.0 + rate) ** years))

    def xirr_newton(cfs, dts, guess=0.1, max_iter=80, tol=1e-7):
        dts = np.asarray(dts, dtype="datetime64[ns]")
        cfs = np.asarray(cfs, dtype=float)
        rate = float(guess)
        for _ in range(max_iter):
            f = xnpv(rate, cfs, dts)
            if not np.isfinite(f):
                return np.nan
            if abs(f) < tol:
                return rate
            eps = 1e-6
            f1 = xnpv(rate + eps, cfs, dts)
            df = (f1 - f) / eps
            if df == 0 or not np.isfinite(df):
                return np.nan
            rate_new = rate - f / df
            if rate_new <= -0.999999 or not np.isfinite(rate_new):
                return np.nan
            rate = rate_new
        return np.nan

    def compute_fund_quarter_xirr(fund_cash, fund_q):
        fund_cash = fund_cash.sort_values("TransactionDate")
        cfs = (-fund_cash["Adj Drawdown EUR"].abs() + fund_cash["Adj Repayment EUR"].abs()).to_numpy(dtype=float)
        dts = fund_cash["TransactionDate"].to_numpy(dtype="datetime64[ns]")
        irr_vals = []
        irr_flags = []
        j = 0
        n = len(cfs)
        for r in fund_q.itertuples(index=False):
            q_end = np.datetime64(r.QuarterEndDate, "ns")
            while j < n and dts[j] <= q_end:
                j += 1
            cfs_slice = cfs[:j]
            dts_slice = dts[:j]
            if len(cfs_slice) == 0:
                irr_vals.append(np.nan)
                irr_flags.append("no_txns")
                continue
            terminal_nav = float(abs(r.NAV)) if np.isfinite(r.NAV) else 0.0
            cfs_full = np.append(cfs_slice, terminal_nav)
            dts_full = np.append(dts_slice, q_end).astype("datetime64[ns]")
            if not (np.any(cfs_full < 0) and np.any(cfs_full > 0)):
                irr_vals.append(np.nan)
                irr_flags.append("no_sign_change")
                continue
            tvpi = r.TVPI
            guess = 0.10 if (pd.notna(tvpi) and tvpi > 1.0) else -0.10
            irr = xirr_newton(cfs_full, dts_full, guess=guess)
            if pd.notna(tvpi) and (0.98 <= tvpi <= 1.02):
                if not np.isfinite(irr):
                    irr2 = xirr_newton(cfs_full, dts_full, guess=-guess)
                    if np.isfinite(irr2):
                        irr = irr2
                        irr_flags.append("flat_retry_success")
                    else:
                        irr = 0.0
                        irr_flags.append("flat_to_zero")
                else:
                    irr_flags.append("flat_success")
            else:
                irr_flags.append("ok" if np.isfinite(irr) else "fail")
            irr_vals.append(irr)
        return pd.DataFrame({"IRR": irr_vals, "IRR_Flag": irr_flags})

    irr_rows = []
    for fund_id, fund_q in q_snap.groupby("FundID", sort=False):
        fund_cash = cash[cash["FundID"] == fund_id]
        irr_df = compute_fund_quarter_xirr(fund_cash, fund_q)
        irr_df = irr_df.copy()
        irr_df["FundID"] = fund_id
        irr_df["QPeriod"] = fund_q["QPeriod"].values
        irr_rows.append(irr_df)

    if irr_rows:
        irr_all = pd.concat(irr_rows, ignore_index=True)
        q_snap = q_snap.merge(irr_all, on=["FundID", "QPeriod"], how="left")
    else:
        q_snap["IRR"] = np.nan

    def quartile_to_grade(s):
        r = s.rank(pct=True)
        return pd.cut(r, [0, 0.25, 0.5, 0.75, 1], labels=["D", "C", "B", "A"], include_lowest=True)

    q_snap["Grade_DPI"] = q_snap.groupby(["AdjStrategy", "QPeriod"])["DPI"].transform(quartile_to_grade)
    q_snap["Grade_TVPI"] = q_snap.groupby(["AdjStrategy", "QPeriod"])["TVPI"].transform(quartile_to_grade)
    q_snap["Grade_IRR"] = q_snap.groupby(["AdjStrategy", "QPeriod"])["IRR"].transform(quartile_to_grade)

    DEBT_STRATEGIES = {"Hybrid Debt-Equity", "Private Debt", "Other Private Debt"}
    VC_STRATEGY = "Venture Capital"

    rep = cash[cash["Adj Repayment EUR"].abs() > 0].copy()
    first_repay = rep.groupby("FundID")["TransactionDate"].min().reset_index(name="FirstRepaymentDate")
    first_close = cash.groupby("FundID")["FirstClosingDate"].min().reset_index(name="FirstCloseDate")
    fund_timing = first_close.merge(first_repay, on="FundID", how="left")
    fund_timing = fund_timing.merge(fund_strategy, on="FundID", how="left")
    fund_timing["RepayWithin5Y"] = (
        fund_timing["FirstRepaymentDate"].notna()
        & (fund_timing["FirstRepaymentDate"] <= (fund_timing["FirstCloseDate"] + pd.DateOffset(years=5)))
    )
    fund_timing["BaseYears"] = np.where(fund_timing["RepayWithin5Y"], 5, 6)
    fund_timing["InvestPeriodYears"] = fund_timing["BaseYears"] + 1

    q_snap = q_snap.merge(
        fund_timing[["FundID", "FirstCloseDate", "InvestPeriodYears", "AdjStrategy"]],
        on=["FundID", "AdjStrategy"],
        how="left",
    )
    q_snap["IsInvestmentPeriod"] = q_snap["QuarterEndDate"] <= (
        q_snap["FirstCloseDate"] + q_snap["InvestPeriodYears"].apply(lambda y: pd.DateOffset(years=int(y)))
    )

    fund_counts = (
        q_snap.groupby(["AdjStrategy", "QPeriod"])["FundID"].nunique().rename("StrategyFundCount").reset_index()
    )
    q_snap = q_snap.merge(fund_counts, on=["AdjStrategy", "QPeriod"], how="left")

    q_snap["IsDebt"] = q_snap["AdjStrategy"].isin(DEBT_STRATEGIES)
    q_snap["IsVC"] = q_snap["AdjStrategy"].eq(VC_STRATEGY)

    grade_to_idx = {"A": 0, "B": 1, "C": 2, "D": 3}
    idx_to_grade = {0: "A", 1: "B", 2: "C", 3: "D"}

    def worse_grade(g1, g2):
        if pd.isna(g1):
            return g2
        if pd.isna(g2):
            return g1
        return g1 if grade_to_idx[g1] >= grade_to_idx[g2] else g2

    def downgrade_one_notch(g):
        if pd.isna(g):
            return g
        return idx_to_grade[min(grade_to_idx[g] + 1, 3)]

    def final_grade(row):
        if SMALL_SAMPLE_RULE_ENABLED and row["StrategyFundCount"] < 30:
            return worse_grade(row["Grade_DPI"], row["Grade_TVPI"])
        if row["IsDebt"]:
            return row["Grade_IRR"]
        if row["IsInvestmentPeriod"]:
            return row["Grade_TVPI"] if row["IsVC"] else row["Grade_DPI"]
        base = row["Grade_IRR"]
        dpi_g = row["Grade_DPI"]
        if pd.isna(base):
            return base
        if pd.notna(dpi_g) and (grade_to_idx[dpi_g] > grade_to_idx[base]):
            return downgrade_one_notch(base)
        return base

    q_snap["CurrentGrade"] = q_snap.apply(final_grade, axis=1)

    fund_quarters = cash[["FundID", "QPeriod"]].drop_duplicates().sort_values(["FundID", "QPeriod"])
    fund_quarters["RankQ"] = fund_quarters.groupby("FundID").cumcount() + 1
    fund_quarters["Block4"] = (fund_quarters["RankQ"] - 1) // 4 + 1

    fund_first = (
        cash.dropna(subset=["Grade"]).groupby("FundID", as_index=False).first()[["FundID", "Grade"]]
        .rename(columns={"Grade": "FirstGrade"})
    )

    fund_quarters = fund_quarters.merge(fund_first, on="FundID", how="left")
    fund_quarters = fund_quarters.merge(
        q_snap[["FundID", "QPeriod", "CurrentGrade"]], on=["FundID", "QPeriod"], how="left"
    )

    fund_quarters["AssignedGrade"] = np.where(
        (fund_quarters["RankQ"] <= 20) & fund_quarters["FirstGrade"].notna(),
        fund_quarters["FirstGrade"],
        fund_quarters["CurrentGrade"],
    )

    fund_quarters["AssignedGrade"] = fund_quarters.groupby("FundID")["AssignedGrade"].ffill()

    df = df.merge(fund_quarters[["FundID", "QPeriod", "AssignedGrade"]], on=["FundID", "QPeriod"], how="left")
    df["Grade_Current"] = df["AssignedGrade"]
    if "Grade_Seed" in df.columns:
        df["Grade_Current"] = df["Grade_Current"].fillna(df["Grade_Seed"])
    df["Grade"] = df["Grade_Current"]

    if context:
        print(f"Computed Grade_Current using performance rules for {context}.")

    return df
    if all(c in df.columns for c in ["Grade", "FundID", "quarter_end"]):
        df["Grade"] = df["Grade"].astype(str).str.strip()
        df.loc[df["Grade"].isin(["", "nan", "None", "NaN", "<NA>"]), "Grade"] = np.nan
        df = df.sort_values(["FundID", "quarter_end"])
        df["Grade_Current"] = df.groupby("FundID")["Grade"].ffill()
        df["Grade"] = df["Grade_Current"]
        if context:
            print(f"Computed Grade_Current (forward fill) for {context}.")
    return df


def make_age_bucket(age_q: int) -> str:
    for i in range(len(AGE_BINS_Q) - 1):
        if AGE_BINS_Q[i] < age_q <= AGE_BINS_Q[i + 1]:
            return AGE_LABELS[i]
    return AGE_LABELS[-1]


def one_factor_uniforms(n: int, rng: np.random.Generator, rho: float) -> np.ndarray:
    Z = rng.standard_normal()
    eps = rng.standard_normal(n)
    z = rho * Z + sqrt(1.0 - rho * rho) * eps
    return stats.norm.cdf(z)




# robust params parser (handles "np.float64(0.1)" style)
def parse_params(val):
    if val is None:
        return None
    if isinstance(val, (list, tuple)):
        if len(val) == 1 and isinstance(val[0], (list, tuple)):
            return val[0]
        return val
    if isinstance(val, str):
        s = val.strip()
        if s == "" or s.lower() in ("no_data", "nan", "none"):
            return None
        try:
            return ast.literal_eval(s)
        except Exception:
            try:
                cleaned = s.replace("np.float64", "").replace("np.float32", "")
                cleaned = cleaned.replace("numpy.float64", "").replace("numpy.float32", "")
                return ast.literal_eval(cleaned)
            except Exception:
                import re
                nums = re.findall(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", s)
                if nums:
                    return tuple(float(x) for x in nums)
                return None
    return val

def load_fit_table(path: str, key_cols: list) -> dict:
    df = pd.read_csv(path)
    out = {}
    for _, r in df.iterrows():
        key = tuple(r[c] for c in key_cols)
        out[key] = r.to_dict()
    return out


def sample_from_dist(dist_name: str, params, u: float) -> float:
    if params is None:
        return 0.0
    if isinstance(params, (list, tuple)) and len(params) == 1 and isinstance(params[0], (list, tuple)):
        params = params[0]
    dist = getattr(stats, dist_name)
    return float(dist.ppf(u, *params))


@dataclass
class RecallableBucket:
    created_q: int
    expiry_q: int
    amount_remaining: float


@dataclass
class RecallableLedger:
    rho: float
    expiry_quarters: int
    commitment: float
    buckets: list = field(default_factory=list)

    def _rc_cap(self) -> float:
        return max(float(self.rho), 0.0) * max(float(self.commitment), 0.0)

    def drop_expired(self, q: int) -> None:
        if int(self.expiry_quarters) <= 0:
            self.buckets = []
            return
        self.buckets = [b for b in self.buckets if b.expiry_q >= q and b.amount_remaining > 0]

    def available(self, q: int) -> float:
        self.drop_expired(q)
        return float(sum(b.amount_remaining for b in self.buckets))

    def add_recallable(self, q: int, rc_amount: float, enforce_cap: bool = True) -> float:
        self.drop_expired(q)
        x = max(float(rc_amount or 0.0), 0.0)
        if x <= 0.0 or int(self.expiry_quarters) <= 0:
            return 0.0
        add_amt = x
        if enforce_cap:
            cap = self._rc_cap()
            cur = self.available(q)
            room = max(cap - cur, 0.0)
            add_amt = min(add_amt, room)
        if add_amt <= 0.0:
            return 0.0
        self.buckets.append(RecallableBucket(created_q=q, expiry_q=q + int(self.expiry_quarters), amount_remaining=float(add_amt)))
        return float(add_amt)

    def consume_for_drawdown(self, q: int, draw_amount: float) -> dict:
        self.drop_expired(q)
        need = max(float(draw_amount or 0.0), 0.0)
        if need <= 0.0:
            return {"use_rc": 0.0, "use_commitment": 0.0}
        self.buckets.sort(key=lambda b: b.created_q)
        use_rc = 0.0
        for b in self.buckets:
            if need <= 0:
                break
            take = min(b.amount_remaining, need)
            b.amount_remaining -= take
            need -= take
            use_rc += take
        return {"use_rc": use_rc, "use_commitment": max(draw_amount - use_rc, 0.0)}

In [4]:
# --- MSCI projection (from msci_projection.ipynb) ---

def load_msci_quarterly(msci_xlsx_path: str) -> pd.DataFrame:
    msci = pd.read_excel(msci_xlsx_path)
    if "Date" not in msci.columns or "SCXP Index" not in msci.columns:
        raise ValueError("MSCI file must contain columns: 'Date' and 'SCXP Index'")
    msci = msci[["Date", "SCXP Index"]].copy()
    msci["Date"] = pd.to_datetime(msci["Date"], errors="coerce")
    msci["SCXP Index"] = pd.to_numeric(msci["SCXP Index"], errors="coerce")
    msci = msci.dropna(subset=["Date", "SCXP Index"]).sort_values("Date")
    msci["quarter_end"] = msci["Date"].dt.to_period("Q").dt.to_timestamp("Q")
    q = (msci.groupby("quarter_end", as_index=False)["SCXP Index"]
         .last().rename(columns={"SCXP Index": "index_level"})
         .sort_values("quarter_end").reset_index(drop=True))
    q["msci_ret_q"] = q["index_level"].pct_change()
    q = q.dropna(subset=["msci_ret_q"]).reset_index(drop=True)
    return q


def label_regimes_by_quantiles(q_returns: pd.Series, low_q=0.33, high_q=0.67) -> pd.Series:
    q_low = q_returns.quantile(low_q)
    q_high = q_returns.quantile(high_q)
    regime = pd.Series(index=q_returns.index, dtype="object")
    regime[q_returns <= q_low] = "bear"
    regime[q_returns >= q_high] = "bull"
    regime[(q_returns > q_low) & (q_returns < q_high)] = "flat"
    return regime


def estimate_transition_matrix(regimes: pd.Series, states=("bear", "flat", "bull"), laplace=1.0) -> pd.DataFrame:
    states = list(states)
    counts = pd.DataFrame(0.0, index=states, columns=states)
    r = regimes.dropna().tolist()
    for a, b in zip(r[:-1], r[1:]):
        if a in states and b in states:
            counts.loc[a, b] += 1.0
    counts = counts + laplace
    P = counts.div(counts.sum(axis=1), axis=0)
    return P


def estimate_regime_params(df_q: pd.DataFrame, states=("bear", "flat", "bull")) -> pd.DataFrame:
    out = []
    overall_sigma = float(df_q["msci_ret_q"].std(ddof=1))
    overall_sigma = max(overall_sigma, 1e-6)
    for s in states:
        sub = df_q.loc[df_q["regime"] == s, "msci_ret_q"].dropna()
        mu = float(sub.mean()) if len(sub) else 0.0
        sigma = float(sub.std(ddof=1)) if len(sub) > 1 else overall_sigma
        sigma = max(sigma, 1e-6)
        out.append((s, mu, sigma))
    return pd.DataFrame(out, columns=["regime", "mu_q", "sigma_q"]).set_index("regime")


def apply_persistence_tilt(P: pd.DataFrame, scenario: str, k: float = 1.2) -> pd.DataFrame:
    scenario = scenario.lower().strip()
    if scenario not in {"bullish", "neutral", "bearish"}:
        raise ValueError("scenario must be one of: bullish, neutral, bearish")
    if scenario == "neutral":
        return P.copy()
    target = "bull" if scenario == "bullish" else "bear"
    P2 = P.copy()
    for s in P2.index:
        P2.loc[s, target] *= k
    P2.loc[target, target] *= k
    P2 = P2.div(P2.sum(axis=1), axis=0)
    return P2


def simulate_markov_regimes(P: pd.DataFrame, start_state: str, n_steps: int, rng: np.random.Generator) -> list:
    states = list(P.index)
    if start_state not in states:
        start_state = "flat" if "flat" in states else states[0]
    path = [start_state]
    for _ in range(n_steps):
        cur = path[-1]
        probs = P.loc[cur].values.astype(float)
        nxt = rng.choice(states, p=probs)
        path.append(nxt)
    return path[1:]


def project_msci_mc(df_q_hist, start_quarter_end, n_quarters=40, n_sims=100,
                    low_q=0.33, high_q=0.67, laplace=1.0, seed=1234,
                    scenario="neutral", tilt_strength=1.2):
    rng = np.random.default_rng(seed)
    df = df_q_hist.copy().sort_values("quarter_end").reset_index(drop=True)
    df["regime"] = label_regimes_by_quantiles(df["msci_ret_q"], low_q=low_q, high_q=high_q)
    P = estimate_transition_matrix(df["regime"], laplace=laplace)
    params = estimate_regime_params(df)
    P_tilted = apply_persistence_tilt(P, scenario=scenario, k=tilt_strength)

    hist_levels = df_q_hist[["quarter_end", "index_level"]].drop_duplicates("quarter_end").sort_values("quarter_end")
    if start_quarter_end not in set(hist_levels["quarter_end"]):
        prev = hist_levels.loc[hist_levels["quarter_end"] < start_quarter_end]
        if prev.empty:
            raise ValueError("Start quarter is before the first msci quarter in the file.")
        start_quarter_end = prev["quarter_end"].iloc[-1]
    start_level = float(hist_levels.loc[hist_levels["quarter_end"] == start_quarter_end, "index_level"].iloc[0])

    df_reg = df.loc[df["quarter_end"] <= start_quarter_end].dropna(subset=["regime"])
    start_regime = df_reg["regime"].iloc[-1] if not df_reg.empty else "flat"

    future_qe = pd.period_range(start=start_quarter_end.to_period("Q") + 1, periods=n_quarters, freq="Q").to_timestamp("Q")

    rows = []
    for sim_id in range(1, n_sims + 1):
        regime_path = simulate_markov_regimes(P_tilted, start_regime, n_quarters, rng)
        level = start_level
        for qe, s in zip(future_qe, regime_path):
            mu = float(params.loc[s, "mu_q"])
            sig = float(params.loc[s, "sigma_q"])
            r = mu + sig * rng.standard_normal()
            level *= (1.0 + r)
            rows.append({
                "sim_id": sim_id,
                "quarter_end": qe,
                "regime": s,
                "msci_ret_q": r,
                "index_level": level,
                "scenario": scenario,
            })
    proj = pd.DataFrame(rows)
    return proj, P, P_tilted, params

In [5]:
# --- Load data + fits ---

df = pd.read_csv(INPUT_PATH, engine="python")
df = normalize_columns(df)
df = add_quarter_end(df)
df = df.dropna(subset=["FundID", "quarter_end"])
df = apply_current_grade(df, context="simulation")

start_qe = pd.Period(f"{START_YEAR}Q{START_QUARTER[-1]}", freq="Q").to_timestamp("Q")
if START_FROM_HIST_END and HIST_END:
    try:
        y = int(HIST_END[:4]); q = int(HIST_END[-1])
        start_qe = pd.Period(f"{y}Q{q}", freq="Q").to_timestamp("Q")
        print(f"Using HIST_END as start_qe: {start_qe.date()}")
    except Exception:
        pass
quarters = [start_qe + pd.offsets.QuarterEnd(i) for i in range(1, HORIZON_Q + 1)]



def pick_fit_file(selected_name: str, fallback_name: str) -> Path:
    sel = Path(FIT_DIR) / selected_name
    if sel.exists():
        return sel
    fb = Path(FIT_DIR) / fallback_name
    if fb.exists():
        print(f"Warning: {selected_name} not found; using {fallback_name}.")
        return fb
    raise FileNotFoundError(f"Missing both {selected_name} and {fallback_name} in {FIT_DIR}")

ratio_sel = load_fit_table(Path(FIT_DIR) / "ratio_fit_selected.csv", ["Adj Strategy", "Grade", "AgeBucket", "ratio"])
timing_sel = load_fit_table(Path(FIT_DIR) / "timing_probs_selected.csv", ["Adj Strategy", "Grade", "AgeBucket"])

omega_sel = None
omega_sel_path = Path(FIT_DIR) / "omega_selected.csv"
if omega_sel_path.exists():
    omega_sel = load_fit_table(omega_sel_path, ["Adj Strategy", "Grade", "AgeBucket"])
    print("Loaded omega_selected.csv")

rho_event = 0.25
rho_size = 0.15
if Path(COPULA_PATH).exists():
    cop = json.loads(Path(COPULA_PATH).read_text())
    rho_event = float(cop.get("rho_event", rho_event))
    rho_size = float(cop.get("rho_size", rho_size))





# --- NAV anchor config (data-driven) ---
NAV_ANCHOR_ENABLED = os.environ.get("NAV_ANCHOR_ENABLED", "1").lower() in ("1","true","yes","y")
NAV_ANCHOR_MIN_AGE_BUCKET = os.environ.get("NAV_ANCHOR_MIN_AGE_BUCKET", "12-15")
NAV_ANCHOR_END_Q = int(os.environ.get("NAV_ANCHOR_END_Q", "12"))
NAV_ANCHOR_LAMBDA_DEFAULT = float(os.environ.get("NAV_ANCHOR_LAMBDA", "0.05"))
NAV_ANCHOR_END_LAMBDA_DEFAULT = float(os.environ.get("NAV_ANCHOR_END_LAMBDA", "0.1"))
NAV_ANCHOR_MIN_MULT = float(os.environ.get("NAV_ANCHOR_MIN_MULT", "0.8"))
NAV_ANCHOR_MAX_MULT = float(os.environ.get("NAV_ANCHOR_MAX_MULT", "1.2"))
NAV_LIQUIDATE_Q_DEFAULT = int(os.environ.get("NAV_LIQUIDATE_Q", "4"))

NAV_TARGETS = {}
NAV_TARGETS_STRAT = {}
NAV_LAM_STRAT = {}
NAV_LAM_END_STRAT = {}
NAV_LIQ_Q_STRAT = {}

nav_target_path = Path(FIT_DIR) / "nav_anchor_targets.csv"
if nav_target_path.exists():
    nt = pd.read_csv(nav_target_path)
    if "avg_nav_to_paidin" in nt.columns:
        for _, r in nt.iterrows():
            s = r.get("Adj Strategy")
            a = r.get("AgeBucket")
            v = r.get("avg_nav_to_paidin")
            if pd.notna(s) and pd.notna(a) and pd.notna(v):
                NAV_TARGETS[(s, a)] = float(v)
        # strategy-level fallback
        for s, g in nt.groupby("Adj Strategy"):
            v = g["avg_nav_to_paidin"].mean()
            if pd.notna(v):
                NAV_TARGETS_STRAT[s] = float(v)
        print("Loaded NAV targets from", nav_target_path)

nav_cal_path = Path(FIT_DIR) / "nav_anchor_calibration.csv"
if nav_cal_path.exists():
    nav_cal = pd.read_csv(nav_cal_path)
    for _, r in nav_cal.iterrows():
        s = r.get("Adj Strategy")
        if pd.isna(s):
            continue
        lam = r.get("lambda")
        lam_end = r.get("lambda_end")
        liq = r.get("liq_q_p90") if "liq_q_p90" in r else r.get("liq_q_p75", r.get("liq_q_median"))
        if pd.notna(lam):
            NAV_LAM_STRAT[s] = float(lam)
        if pd.notna(lam_end):
            NAV_LAM_END_STRAT[s] = float(lam_end)
        if pd.notna(liq):
            NAV_LIQ_Q_STRAT[s] = int(liq)
    print("Loaded NAV anchor calibration from", nav_cal_path)

# MSCI quarterly returns
msci_q = None
if Path(MSCI_PATH).exists():
    msci_q = load_msci_quarterly(MSCI_PATH)
    msci_q["msci_ret_q_lag1"] = msci_q["msci_ret_q"].shift(1)
if msci_q is None or len(msci_q) == 0:
    raise FileNotFoundError("MSCI data not loaded. Check MSCI_PATH and file contents.")


  df.loc[m, "quarter_end"] = pd.PeriodIndex(year=years, quarter=quarters, freq="Q").to_timestamp("Q")
  q_snap["FirstCloseDate"] + q_snap["InvestPeriodYears"].apply(lambda y: pd.DateOffset(years=int(y)))


KeyboardInterrupt: 

In [None]:
# --- Build fund states ---

hist = df[df["quarter_end"] <= start_qe].copy()
last = hist.sort_values(["FundID", "quarter_end"]).groupby("FundID").tail(1)

# Precompute historical cashflows for IRR and cumulative stats
hist_sorted = hist.sort_values(["FundID", "quarter_end"]).copy()

# --- Data-driven draw pace + p_draw calibration (hierarchical) ---
AGE_BINS_Q = [-1, 3, 7, 11, 15, 19, 1000]
AGE_LABELS = ["0-3", "4-7", "8-11", "12-15", "16-19", "20+"]

hist_sorted["AgeBucket"] = pd.cut(
    pd.to_numeric(hist_sorted.get("Fund_Age_Quarters"), errors="coerce"),
    bins=AGE_BINS_Q,
    labels=AGE_LABELS,
)

# fund-level commitment map
comm_h = pd.to_numeric(hist_sorted.get("Commitment EUR"), errors="coerce")
commit_map_h = comm_h.groupby(hist_sorted["FundID"]).max()

# cumulative recallables for denom
rc_cum_h = hist_sorted.groupby("FundID")["Recallable"].transform(
    lambda s: pd.to_numeric(s, errors="coerce").abs().cumsum()
)

# denom for cumulative draw ratio
denom_h = hist_sorted["FundID"].map(commit_map_h).fillna(0.0) + rc_cum_h.fillna(0.0)

# draw flags + delta ratio
delta_draw = pd.to_numeric(hist_sorted.get("Adj Drawdown EUR"), errors="coerce").abs().fillna(0.0)
hist_sorted["draw_flag"] = delta_draw > 0
delta_ratio = np.where(denom_h > 0, delta_draw / denom_h, 0.0)
hist_sorted["delta_ratio"] = delta_ratio

# group keys
G_SGA = ["Adj Strategy", "Grade", "AgeBucket"]
G_SA = ["Adj Strategy", "AgeBucket"]
G_S = ["Adj Strategy"]

# p_draw by group with counts
pdraw_sga = hist_sorted.groupby(G_SGA)["draw_flag"].agg(["mean", "count"])
pdraw_sa = hist_sorted.groupby(G_SA)["draw_flag"].agg(["mean", "count"])
pdraw_s = hist_sorted.groupby(G_S)["draw_flag"].agg(["mean", "count"])

p_draw_sga = pdraw_sga["mean"].to_dict()
p_draw_sga_n = pdraw_sga["count"].to_dict()
p_draw_sa = pdraw_sa["mean"].to_dict()
p_draw_sa_n = pdraw_sa["count"].to_dict()
p_draw_s = pdraw_s["mean"].to_dict()
p_draw_s_n = pdraw_s["count"].to_dict()
p_draw_global = float(hist_sorted["draw_flag"].mean()) if len(hist_sorted) else 0.0

# draw pace by group (delta ratio)
pace_sga_all = hist_sorted.groupby(G_SGA)["delta_ratio"].mean().to_dict()
pace_sga_all_n = hist_sorted.groupby(G_SGA)["delta_ratio"].count().to_dict()
pace_sa_all = hist_sorted.groupby(G_SA)["delta_ratio"].mean().to_dict()
pace_sa_all_n = hist_sorted.groupby(G_SA)["delta_ratio"].count().to_dict()
pace_s_all = hist_sorted.groupby(G_S)["delta_ratio"].mean().to_dict()
pace_s_all_n = hist_sorted.groupby(G_S)["delta_ratio"].count().to_dict()

pace_sga_pos = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SGA)["delta_ratio"].mean().to_dict()
pace_sga_pos_n = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SGA)["delta_ratio"].count().to_dict()
pace_sa_pos = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SA)["delta_ratio"].mean().to_dict()
pace_sa_pos_n = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SA)["delta_ratio"].count().to_dict()
pace_s_pos = hist_sorted[hist_sorted["draw_flag"]].groupby(G_S)["delta_ratio"].mean().to_dict()
pace_s_pos_n = hist_sorted[hist_sorted["draw_flag"]].groupby(G_S)["delta_ratio"].count().to_dict()

pace_global_all = float(hist_sorted["delta_ratio"].mean()) if len(hist_sorted) else 0.0
pace_global_pos = float(hist_sorted.loc[hist_sorted["draw_flag"], "delta_ratio"].mean()) if hist_sorted["draw_flag"].any() else 0.0

# aliases for downstream compatibility
draw_pace_global_all = pace_global_all
draw_pace_global_pos = pace_global_pos


# Build timing fallback maps from timing_sel
_timing_rows = []
for (s, g, a), v in timing_sel.items():
    _timing_rows.append({
        "Adj Strategy": s,
        "Grade": g,
        "AgeBucket": a,
        "p_draw": float(v.get("p_draw", 0.0)),
        "p_rep": float(v.get("p_rep", 0.0)),
        "p_rc_given_rep": float(v.get("p_rc_given_rep", 0.0)),
    })
_timing_df = pd.DataFrame(_timing_rows)

if len(_timing_df):
    timing_by_sa = _timing_df.groupby(["Adj Strategy", "AgeBucket"])[["p_draw", "p_rep", "p_rc_given_rep"]].mean().to_dict("index")
    timing_by_s = _timing_df.groupby(["Adj Strategy"])[["p_draw", "p_rep", "p_rc_given_rep"]].mean().to_dict("index")
    timing_global = _timing_df[["p_draw", "p_rep", "p_rc_given_rep"]].mean().to_dict()
else:
    timing_by_sa = {}
    timing_by_s = {}
    timing_global = {"p_draw": 0.0, "p_rep": 0.0, "p_rc_given_rep": 0.0}

P_DRAW_MIN_N = 50
PACE_MIN_N = 50
P_DRAW_MULT_MIN = 0.5
P_DRAW_MULT_MAX = 2.0


def get_timing_probs(strategy, grade, age_bucket):
    tp = timing_sel.get((strategy, grade, age_bucket))
    if tp is None:
        tp = timing_by_sa.get((strategy, age_bucket))
    if tp is None:
        tp = timing_by_s.get(strategy)
    if tp is None:
        tp = timing_global
    if tp is None:
        return {"p_draw": 0.0, "p_rep": 0.0, "p_rc_given_rep": 0.0}
    return tp


def get_hist_p_draw_and_n(strategy, grade, age_bucket):
    key_sga = (strategy, grade, age_bucket)
    if key_sga in p_draw_sga and p_draw_sga_n.get(key_sga, 0) >= P_DRAW_MIN_N:
        return float(p_draw_sga[key_sga]), int(p_draw_sga_n.get(key_sga, 0))
    key_sa = (strategy, age_bucket)
    if key_sa in p_draw_sa and p_draw_sa_n.get(key_sa, 0) >= P_DRAW_MIN_N:
        return float(p_draw_sa[key_sa]), int(p_draw_sa_n.get(key_sa, 0))
    if strategy in p_draw_s and p_draw_s_n.get(strategy, 0) >= P_DRAW_MIN_N:
        return float(p_draw_s[strategy]), int(p_draw_s_n.get(strategy, 0))
    return float(p_draw_global), int(len(hist_sorted))


def get_model_p_draw(strategy, grade, age_bucket):
    tp = get_timing_probs(strategy, grade, age_bucket)
    return float(tp.get("p_draw", 0.0))


def get_p_draw_multiplier(strategy, grade, age_bucket):
    hist_val, _n = get_hist_p_draw_and_n(strategy, grade, age_bucket)
    model_val = get_model_p_draw(strategy, grade, age_bucket)
    if model_val <= 0 or not np.isfinite(model_val):
        return 1.0
    mult = hist_val / model_val if np.isfinite(hist_val) else 1.0
    if not np.isfinite(mult):
        mult = 1.0
    return float(np.clip(mult, P_DRAW_MULT_MIN, P_DRAW_MULT_MAX))


def get_blend_weight(n_obs: int) -> float:
    if n_obs >= 200:
        return 0.7
    if n_obs >= 100:
        return 0.5
    if n_obs >= 50:
        return 0.35
    return 0.2


def get_p_draw_adjusted(strategy, grade, age_bucket):
    hist_val, n_obs = get_hist_p_draw_and_n(strategy, grade, age_bucket)
    model_val = get_model_p_draw(strategy, grade, age_bucket)
    mult = get_p_draw_multiplier(strategy, grade, age_bucket)
    p_mult = model_val * mult
    w = get_blend_weight(n_obs)
    if not np.isfinite(hist_val):
        p_adj = p_mult
    elif not np.isfinite(p_mult):
        p_adj = hist_val
    else:
        p_adj = (1.0 - w) * p_mult + w * hist_val
    # uplift-only: never below model
    p_adj = max(p_adj, model_val)
    return float(np.clip(p_adj, 0.0, 0.95))


def get_draw_pace(strategy, grade, age_bucket, use_positive=False):
    if use_positive:
        pace_sga = pace_sga_pos
        pace_sga_n = pace_sga_pos_n
        pace_sa = pace_sa_pos
        pace_sa_n = pace_sa_pos_n
        pace_s = pace_s_pos
        pace_s_n = pace_s_pos_n
        pace_global = pace_global_pos
    else:
        pace_sga = pace_sga_all
        pace_sga_n = pace_sga_all_n
        pace_sa = pace_sa_all
        pace_sa_n = pace_sa_all_n
        pace_s = pace_s_all
        pace_s_n = pace_s_all_n
        pace_global = pace_global_all

    key_sga = (strategy, grade, age_bucket)
    if key_sga in pace_sga and pace_sga_n.get(key_sga, 0) >= PACE_MIN_N:
        return float(pace_sga[key_sga])
    key_sa = (strategy, age_bucket)
    if key_sa in pace_sa and pace_sa_n.get(key_sa, 0) >= PACE_MIN_N:
        return float(pace_sa[key_sa])
    if strategy in pace_s and pace_s_n.get(strategy, 0) >= PACE_MIN_N:
        return float(pace_s[strategy])
    return float(pace_global)



# fund-level commitment (Commitment EUR only)
comm = pd.to_numeric(hist_sorted.get("Commitment EUR"), errors="coerce")
commitment_map = comm.groupby(hist_sorted["FundID"]).max()
flow_map = {}
rep_cum_map = {}
draw_cum_map = {}
for fid, g in hist_sorted.groupby("FundID", sort=False):
    draw_abs = pd.to_numeric(g.get("Adj Drawdown EUR"), errors="coerce").fillna(0.0).abs()
    rep_abs = pd.to_numeric(g.get("Adj Repayment EUR"), errors="coerce").fillna(0.0).abs()
    flows = (-draw_abs + rep_abs).astype(float).tolist()
    dates = g["quarter_end"].tolist()
    flow_map[fid] = (flows, dates)
    draw_cum_map[fid] = float(draw_abs.sum())
    rep_cum_map[fid] = float(rep_abs.sum())

recall_cum_map = {}
for fid, g in hist_sorted.groupby("FundID", sort=False):
    rc_abs = pd.to_numeric(g.get("Recallable"), errors="coerce").fillna(0.0).abs()
    recall_cum_map[fid] = float(rc_abs.sum())

# Investment period timing (5+1 if repayments start within 5y else 6+1)
first_close = hist_sorted.groupby("FundID")["First Closing Date"].min()
first_close = pd.to_datetime(first_close, errors="coerce")
if first_close.isna().any():
    fallback = hist_sorted.groupby("FundID")["quarter_end"].min()
    first_close = first_close.fillna(fallback)

first_repay = hist_sorted[hist_sorted["Adj Repayment EUR"].abs() > 0].groupby("FundID")["quarter_end"].min()
first_repay = first_repay.reindex(first_close.index)
repay_within_5y = first_repay.notna() & (first_repay <= (first_close + pd.DateOffset(years=5)))
base_years = np.where(repay_within_5y, 5, 6)
invest_years = pd.Series(base_years + 1, index=first_close.index)
invest_end = pd.Series(index=first_close.index, dtype="datetime64[ns]")
for fid, fc in first_close.items():
    if pd.isna(fc):
        invest_end.loc[fid] = pd.NaT
    else:
        invest_end.loc[fid] = fc + pd.DateOffset(years=int(invest_years.loc[fid]))


# Planned end + strategy overrun (from history)
planned_end = hist_sorted.groupby("FundID")["Planned End Date"].last() if "Planned End Date" in hist_sorted.columns else pd.Series(index=first_close.index, dtype="datetime64[ns]")
planned_end = pd.to_datetime(planned_end, errors="coerce")
planned_end_qe = planned_end.dt.to_period("Q").dt.to_timestamp("Q")
last_obs = hist_sorted.groupby("FundID")["quarter_end"].max()

def quarters_diff(a, b):
    if pd.isna(a) or pd.isna(b):
        return np.nan
    return float(pd.Period(a, freq="Q").ordinal - pd.Period(b, freq="Q").ordinal)

overrun_q = (last_obs.to_frame("last_qe").join(planned_end_qe.rename("planned_end_qe"))
            .apply(lambda r: max(quarters_diff(r["last_qe"], r["planned_end_qe"]), 0.0) if pd.notna(r["planned_end_qe"]) else np.nan, axis=1))
overran_only = overrun_q[overrun_q.notna() & (overrun_q > 0)]
fund_strategy = hist_sorted.groupby("FundID")["Adj Strategy"].agg(lambda s: s.mode().iat[0] if len(s.mode()) else s.iloc[0])
avg_overrun_by_strategy = (overran_only.to_frame("overrun_q")
                           .join(fund_strategy.rename("Adj Strategy"))
                           .groupby("Adj Strategy")["overrun_q"].mean().clip(lower=0.0))

fund_end_qe = planned_end_qe.copy()
for fid, pe in planned_end_qe.items():
    if pd.isna(pe):
        continue
    strat = fund_strategy.get(fid, None)
    avg_over = float(avg_overrun_by_strategy.get(strat, 0.0)) if strat is not None else 0.0
    if avg_over > 0:
        fund_end_qe.loc[fid] = (pd.Period(pe, freq="Q") + int(round(avg_over))).to_timestamp("Q")

fund_states = {}
for _, r in last.iterrows():
    fid = r["FundID"]
    strategy = r.get("Adj Strategy", "Unknown")
    grade = r.get("Grade", "D")
    status = str(r.get("Fund Workflow Stage", "")).strip().lower()
    if "terminated" in status:
        continue
    age_q = int(pd.to_numeric(r.get("Fund_Age_Quarters", 0), errors="coerce") or 0)
    nav = float(pd.to_numeric(r.get("NAV Adjusted EUR", 0), errors="coerce") or 0.0)
    fc = first_close.get(fid, pd.NaT)
    if pd.notna(fc):
        fc_qe = pd.Period(fc, freq="Q").to_timestamp("Q")
        start_qe_fund = max(fc_qe, start_qe)
    else:
        start_qe_fund = start_qe
    commitment = float(commitment_map.get(fid, 0.0) or 0.0)
    dd_commit = float(pd.to_numeric(r.get("draw_cum_prev", 0), errors="coerce") or 0.0)
    rho = float(pd.to_numeric(r.get("Recallable_Percentage_Decimal", 0), errors="coerce") or 0.0)
    exp_q_val = pd.to_numeric(r.get("Expiration_Quarters", 0), errors="coerce")
    exp_q = int(exp_q_val) if pd.notna(exp_q_val) else 0

    flows, dates = flow_map.get(fid, ([], []))

    fund_states[fid] = {
        "strategy": strategy,
        "grade": grade,
        "grade_seed": grade,
        "age0": age_q,
        "start_qe": start_qe_fund,
        "nav": nav,
        "dd_commit": dd_commit,
        "draw_cum": draw_cum_map.get(fid, 0.0),
        "draw_cum_ratio": draw_cum_map.get(fid, 0.0) / commitment if commitment else 0.0,
        "rep_cum": rep_cum_map.get(fid, 0.0),
        "recall_cum": recall_cum_map.get(fid, 0.0),
        "commitment": commitment,
        "ledger": RecallableLedger(rho=rho, expiry_quarters=exp_q, commitment=commitment),
        "cf_amounts": list(flows),
        "cf_dates": list(dates),
        "invest_end": invest_end.get(fid, pd.NaT),
        "fund_end_qe": fund_end_qe.get(fid, pd.NaT),
    }

# include funds with no historical cashflows/NAV (but active)
all_sorted = df.sort_values(["FundID", "quarter_end"])
all_last = all_sorted.groupby("FundID").tail(1)
# fund-level commitment map from full data
comm_all = pd.to_numeric(all_sorted.get("Commitment EUR"), errors="coerce")
commit_map_all = comm_all.groupby(all_sorted["FundID"]).max()
first_close_all = all_sorted.groupby("FundID")["First Closing Date"].min()
first_close_all = pd.to_datetime(first_close_all, errors="coerce")
planned_end_all = all_sorted.groupby("FundID")["Planned End Date"].last() if "Planned End Date" in all_sorted.columns else pd.Series(index=first_close_all.index, dtype="datetime64[ns]")
planned_end_all = pd.to_datetime(planned_end_all, errors="coerce")
planned_end_qe_all = planned_end_all.dt.to_period("Q").dt.to_timestamp("Q")
fund_strategy_all = all_sorted.groupby("FundID")["Adj Strategy"].agg(lambda s: s.mode().iat[0] if len(s.mode()) else s.iloc[0])
# use last non-null grade
fund_grade_all = all_sorted.groupby("FundID")["Grade"].apply(lambda s: s.dropna().iloc[-1] if s.dropna().shape[0] else np.nan)

for _, r in all_last.iterrows():
    fid = r["FundID"]
    if fid in fund_states:
        continue
    status = str(r.get("Fund Workflow Stage", "")).strip().lower()
    if "terminated" in status:
        continue
    fc = first_close_all.get(fid, pd.NaT)
    if pd.isna(fc) or fc > start_qe:
        continue
    commitment = float(commit_map_all.get(fid, 0.0) or 0.0)
    if commitment <= 0:
        continue
    strategy = fund_strategy_all.get(fid, r.get("Adj Strategy", "Unknown"))
    grade = fund_grade_all.get(fid, r.get("Grade", "D"))
    age_val = pd.to_numeric(r.get("Fund_Age_Quarters", np.nan), errors="coerce")
    if pd.isna(age_val):
        # compute age from first close to cutoff
        age_val = max(0, int(pd.Period(start_qe, freq="Q").ordinal - pd.Period(fc, freq="Q").ordinal))
    age_q = int(age_val)
    nav = 0.0
    dd_commit = 0.0
    rho = float(pd.to_numeric(r.get("Recallable_Percentage_Decimal", 0), errors="coerce") or 0.0)
    exp_q_val = pd.to_numeric(r.get("Expiration_Quarters", 0), errors="coerce")
    exp_q = int(exp_q_val) if pd.notna(exp_q_val) else 0

    # investment period for funds without repayments: default 6+1 years
    invest_end_f = fc + pd.DateOffset(years=7) if pd.notna(fc) else pd.NaT

    pe = planned_end_qe_all.get(fid, pd.NaT)
    if pd.notna(pe):
        strat = fund_strategy_all.get(fid, None)
        avg_over = float(avg_overrun_by_strategy.get(strat, 0.0)) if strat is not None else 0.0
        if avg_over > 0:
            pe = (pd.Period(pe, freq="Q") + int(round(avg_over))).to_timestamp("Q")
    fund_states[fid] = {
        "strategy": strategy,
        "grade": grade if pd.notna(grade) else "D",
        "age0": age_q,
        "start_qe": start_qe_fund,
        "nav": nav,
        "dd_commit": dd_commit,
        "draw_cum": 0.0,
        "draw_cum_ratio": 0.0,
        "rep_cum": 0.0,
        "recall_cum": 0.0,
        "commitment": commitment,
        "ledger": RecallableLedger(rho=rho, expiry_quarters=exp_q, commitment=commitment),
        "cf_amounts": [],
        "cf_dates": [],
        "invest_end": invest_end_f,
        "fund_end_qe": pe,
        "ended": False,
        "grade_end": None,
    }

# one-pass pace calibration (optional) -- match recent history draw pace
if AUTO_PACE_ONEPASS:
    try:
        # historical draw per active fund over last N quarters
        hist_agg = df.groupby("quarter_end", as_index=False).agg(
            hist_draw=("Adj Drawdown EUR", lambda s: pd.to_numeric(s, errors="coerce").abs().sum()),
            active_funds=("FundID", "nunique"),
        ).sort_values("quarter_end")
        hist_last = hist_agg[hist_agg["quarter_end"] <= start_qe].tail(PACE_CALIB_N_Q)
        hist_draw_per_active = float((hist_last["hist_draw"] / hist_last["active_funds"]).mean())

        # expected draw per active fund from base pace + gating at cutoff
        exp_draws = []
        for fid, st in fund_states.items():
            denom = st.get("commitment", 0.0) + st.get("recall_cum", 0.0)
            if denom <= 0:
                continue
            age_q = max(int(st.get("age0", 0)), 0)
            age_bucket = make_age_bucket(age_q)
            strategy = st.get("strategy", "Unknown")
            grade = st.get("grade", "D")
            if age_bucket in ("0-3", "4-7"):
                pace = get_draw_pace(strategy, grade, age_bucket, use_positive=False)
                if pace is None or not np.isfinite(pace) or pace <= 0:
                    pace = draw_pace_global_all if draw_pace_global_all > 0 else 0.0
                p_draw = get_p_draw_adjusted(strategy, grade, age_bucket)
            else:
                pace = get_draw_pace(strategy, grade, age_bucket, use_positive=True)
                if pace is None or not np.isfinite(pace) or pace <= 0:
                    pace = draw_pace_global_pos if draw_pace_global_pos > 0 else 0.0
                # scale pace upward using adjusted vs model p_draw (uplift-only)
                model_p = get_model_p_draw(strategy, grade, age_bucket)
                adj_p = get_p_draw_adjusted(strategy, grade, age_bucket)
                if model_p is not None and np.isfinite(model_p) and model_p > 0:
                    scale = adj_p / model_p if np.isfinite(adj_p) else 1.0
                    if np.isfinite(scale) and scale > 1.0:
                        pace = pace * scale
                p_draw = 1.0  # gating off after early years
            exp_draws.append(denom * pace * p_draw)

        exp_draw_per_active = float(np.mean(exp_draws)) if exp_draws else np.nan
        if np.isfinite(hist_draw_per_active) and np.isfinite(exp_draw_per_active) and exp_draw_per_active > 0:
            raw_scale = hist_draw_per_active / exp_draw_per_active
            PACE_SCALE = float(raw_scale)
            print(f"AUTO_PACE_ONEPASS scale (raw): {raw_scale:.3f}")
        else:
            print("AUTO_PACE_ONEPASS scale skipped: insufficient data")
    except Exception as e:
        print("AUTO_PACE_ONEPASS scale skipped:", e)

# Update horizon to max planned end + overrun
max_end = pd.to_datetime(pd.Series([st["fund_end_qe"] for st in fund_states.values()])).dropna()
if len(max_end):
    max_end_qe = max_end.max()
    total_q = int(pd.Period(max_end_qe, freq="Q").ordinal - pd.Period(start_qe, freq="Q").ordinal)
    if total_q > 0:
        quarters = [start_qe + pd.offsets.QuarterEnd(i) for i in range(1, total_q + 1)]
        HORIZON_Q = len(quarters)
        print(f"Adjusted horizon to planned end: {HORIZON_Q} quarters, end={max_end_qe.date()}")

fund_ids = list(fund_states.keys())


AUTO_PACE_ONEPASS scale skipped: insufficient data
Adjusted horizon to planned end: 65 quarters, end=2041-12-31


  pdraw_sga = hist_sorted.groupby(G_SGA)["draw_flag"].agg(["mean", "count"])
  pdraw_sa = hist_sorted.groupby(G_SA)["draw_flag"].agg(["mean", "count"])
  pace_sga_all = hist_sorted.groupby(G_SGA)["delta_ratio"].mean().to_dict()
  pace_sga_all_n = hist_sorted.groupby(G_SGA)["delta_ratio"].count().to_dict()
  pace_sa_all = hist_sorted.groupby(G_SA)["delta_ratio"].mean().to_dict()
  pace_sa_all_n = hist_sorted.groupby(G_SA)["delta_ratio"].count().to_dict()
  pace_sga_pos = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SGA)["delta_ratio"].mean().to_dict()
  pace_sga_pos_n = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SGA)["delta_ratio"].count().to_dict()
  pace_sa_pos = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SA)["delta_ratio"].mean().to_dict()
  pace_sa_pos_n = hist_sorted[hist_sorted["draw_flag"]].groupby(G_SA)["delta_ratio"].count().to_dict()


In [None]:
# --- Omega setup ---

omega_mu = 0.0
omega_sig = 0.0

if OMEGA_MODE == "global":
    df2 = df.sort_values(["FundID", "quarter_end"]).copy()
    df2["nav_prev"] = df2.groupby("FundID")["NAV Adjusted EUR"].shift(1)
    df2["flow_net"] = pd.to_numeric(df2["Adj Drawdown EUR"], errors="coerce").fillna(0.0) -                       pd.to_numeric(df2["Adj Repayment EUR"], errors="coerce").fillna(0.0)
    m = df2["nav_prev"].abs() > 1.0
    omega = ((df2.loc[m, "NAV Adjusted EUR"] - df2.loc[m, "nav_prev"]) - df2.loc[m, "flow_net"]) / df2.loc[m, "nav_prev"]
    omega = omega.replace([np.inf, -np.inf], np.nan).dropna()
    if len(omega):
        omega_mu = float(omega.mean())
        omega_sig = float(omega.std(ddof=1))

# MSCI stats for unconditional mode
msci_mu = 0.0
msci_sigma = 0.0
msci_map = {}
msci_lag_map = {}
if msci_q is not None and len(msci_q):
    msci_mu = float(msci_q["msci_ret_q"].mean())
    msci_sigma = float(msci_q["msci_ret_q"].std(ddof=1))
    msci_map = dict(zip(msci_q["quarter_end"], msci_q["msci_ret_q"]))
    msci_lag_map = dict(zip(msci_q["quarter_end"], msci_q["msci_ret_q_lag1"]))
    if not np.isfinite(msci_sigma) or msci_sigma <= 0:
        msci_sigma = 1e-6

rng = np.random.default_rng(SEED)

In [None]:
# --- Run simulation ---

DEBT_STRATEGIES = {"Hybrid Debt-Equity", "Private Debt", "Other Private Debt"}
VC_STRATEGY = "Venture Capital"

GRADE_ORDER = ["A", "B", "C", "D"]
grade_to_idx = {g: i for i, g in enumerate(GRADE_ORDER)}
idx_to_grade = {i: g for g, i in grade_to_idx.items()}

def quartile_to_grade(s):
    r = s.rank(pct=True)
    return pd.cut(r, [0, 0.25, 0.5, 0.75, 1], labels=["D", "C", "B", "A"], include_lowest=True)

def worse_grade(g1, g2):
    if pd.isna(g1):
        return g2
    if pd.isna(g2):
        return g1
    return g1 if grade_to_idx[g1] >= grade_to_idx[g2] else g2

def downgrade_one_notch(g):
    if pd.isna(g):
        return g
    return idx_to_grade[min(grade_to_idx[g] + 1, 3)]

def final_grade(row):
    if SMALL_SAMPLE_RULE_ENABLED and row["StrategyFundCount"] < 30:
        return worse_grade(row["Grade_DPI"], row["Grade_TVPI"])
    if row["IsDebt"]:
        return row["Grade_IRR"]
    if row["IsInvestmentPeriod"]:
        return row["Grade_TVPI"] if row["IsVC"] else row["Grade_DPI"]
    base = row["Grade_IRR"]
    dpi_g = row["Grade_DPI"]
    if pd.isna(base):
        return base
    if pd.notna(dpi_g) and (grade_to_idx[dpi_g] > grade_to_idx[base]):
        return downgrade_one_notch(base)
    return base


def xnpv(rate, cfs, dts):
    dts = np.asarray(dts, dtype="datetime64[ns]")
    cfs = np.asarray(cfs, dtype=float)
    t0 = dts[0]
    day_counts = (dts - t0) / np.timedelta64(1, "D")
    years = day_counts / 365.0
    return np.sum(cfs / ((1.0 + rate) ** years))


def xirr_newton(cfs, dts, guess=0.1, max_iter=80, tol=1e-7):
    dts = np.asarray(dts, dtype="datetime64[ns]")
    cfs = np.asarray(cfs, dtype=float)
    rate = float(guess)
    for _ in range(max_iter):
        f = xnpv(rate, cfs, dts)
        if not np.isfinite(f):
            return np.nan
        if abs(f) < tol:
            return rate
        eps = 1e-6
        f1 = xnpv(rate + eps, cfs, dts)
        df = (f1 - f) / eps
        if df == 0 or not np.isfinite(df):
            return np.nan
        rate_new = rate - f / df
        if rate_new <= -0.999999 or not np.isfinite(rate_new):
            return np.nan
        rate = rate_new
    return np.nan


def compute_state_irr(st, qe, tvpi):
    if len(st["cf_amounts"]) == 0:
        return np.nan
    cfs = np.asarray(st["cf_amounts"], dtype=float)
    dts = np.asarray(st["cf_dates"], dtype="datetime64[ns]")
    terminal_nav = float(abs(st["nav"])) if np.isfinite(st["nav"]) else 0.0
    cfs_full = np.append(cfs, terminal_nav)
    dts_full = np.append(dts, np.datetime64(qe, "ns"))
    if not (np.any(cfs_full < 0) and np.any(cfs_full > 0)):
        return np.nan
    guess = 0.10 if (pd.notna(tvpi) and tvpi > 1.0) else -0.10
    irr = xirr_newton(cfs_full, dts_full, guess=guess)
    if pd.notna(tvpi) and (0.98 <= tvpi <= 1.02):
        if not np.isfinite(irr):
            irr2 = xirr_newton(cfs_full, dts_full, guess=-guess)
            if np.isfinite(irr2):
                irr = irr2
            else:
                irr = 0.0
    return irr


def assign_current_grades(metrics_df):
    dfm = metrics_df.copy()
    counts = dfm.groupby("AdjStrategy")["FundID"].nunique().rename("StrategyFundCount").reset_index()
    dfm = dfm.merge(counts, on="AdjStrategy", how="left")
    dfm["Grade_DPI"] = dfm.groupby(["AdjStrategy"])["DPI"].transform(quartile_to_grade)
    dfm["Grade_TVPI"] = dfm.groupby(["AdjStrategy"])["TVPI"].transform(quartile_to_grade)
    dfm["Grade_IRR"] = dfm.groupby(["AdjStrategy"])["IRR"].transform(quartile_to_grade)
    dfm["IsDebt"] = dfm["AdjStrategy"].isin(DEBT_STRATEGIES)
    dfm["IsVC"] = dfm["AdjStrategy"].eq(VC_STRATEGY)
    dfm["CurrentGrade"] = dfm.apply(final_grade, axis=1)
    return dfm


sim_nav = np.zeros((N_SIMS, HORIZON_Q))
nav_sum_fund = None
fund_index = {fid: i for i, fid in enumerate(fund_ids)}
nav_sum_fund = np.zeros((len(fund_ids), HORIZON_Q))
draw_sum_fund = np.zeros((len(fund_ids), HORIZON_Q))
rep_sum_fund = np.zeros((len(fund_ids), HORIZON_Q))
sim_draw = np.zeros((N_SIMS, HORIZON_Q))
sim_rep = np.zeros((N_SIMS, HORIZON_Q))
rep_bucket_stats = {}

def _rep_stat_key(strategy, grade, age_bucket):
    return (strategy, grade, age_bucket)

def _rep_stat_update(key, obs_inc=0, event_inc=0, ratio_val=0.0, pos_inc=0, pos_ratio_val=0.0):
    b = rep_bucket_stats.get(key)
    if b is None:
        b = {"obs": 0, "events": 0, "sum_ratio": 0.0, "pos": 0, "sum_ratio_pos": 0.0}
        rep_bucket_stats[key] = b
    b["obs"] += obs_inc
    b["events"] += event_inc
    b["sum_ratio"] += ratio_val
    b["pos"] += pos_inc
    b["sum_ratio_pos"] += pos_ratio_val

zero_gate = np.zeros(HORIZON_Q)
zero_target = np.zeros(HORIZON_Q)
zero_capacity = np.zeros(HORIZON_Q)
zero_pace = np.zeros(HORIZON_Q)
draw_positive = np.zeros(HORIZON_Q)
active_count = np.zeros(HORIZON_Q)

rep_event_count = np.zeros(HORIZON_Q)
rep_navpos_count = np.zeros(HORIZON_Q)
rep_prep_nan = np.zeros(HORIZON_Q)
timing_key_missing = np.zeros(HORIZON_Q)

rep_rr_missing = np.zeros(HORIZON_Q)



# Precompute MSCI projections for unconditional mode
proj_mc = None
if msci_q is not None:
    proj_mc, _, _, _ = project_msci_mc(
        df_q_hist=msci_q,
        start_quarter_end=start_qe,
        n_quarters=HORIZON_Q,
        n_sims=N_SIMS,
        seed=SEED,
        scenario=MSCI_SCENARIO,
        tilt_strength=MSCI_TILT_STRENGTH,
    )

for s in range(N_SIMS):
    # MSCI series for this simulation (projected path)
    pm = proj_mc[proj_mc["sim_id"] == (s + 1)].sort_values("quarter_end")
    msci_series = pm["msci_ret_q"].tolist()
    if len(msci_series) < HORIZON_Q:
        msci_series = msci_series + [0.0] * (HORIZON_Q - len(msci_series))
    msci_lag_series = [msci_series[0]] + msci_series[:-1]

    state = {
        fid: {
            **st,
            "ledger": RecallableLedger(
                rho=st["ledger"].rho,
                expiry_quarters=st["ledger"].expiry_quarters,
                commitment=st["ledger"].commitment,
                buckets=[RecallableBucket(b.created_q, b.expiry_q, b.amount_remaining) for b in st["ledger"].buckets],
            ),
        } for fid, st in fund_states.items()
    }

    for t, qe in enumerate(quarters):
        U = {
            "draw_event": one_factor_uniforms(len(fund_ids), rng, rho_event),
            "draw_size": one_factor_uniforms(len(fund_ids), rng, rho_size),
            "rep_event": one_factor_uniforms(len(fund_ids), rng, rho_event),
            "rep_size": one_factor_uniforms(len(fund_ids), rng, rho_size),
            "rc_event": one_factor_uniforms(len(fund_ids), rng, rho_event),
            "rc_size": one_factor_uniforms(len(fund_ids), rng, rho_size),
        }

        for i, fid in enumerate(fund_ids):
            st = state[fid]
            if pd.notna(st.get("start_qe")) and st.get("start_qe") > start_qe:
                age_q = int(pd.Period(qe, freq="Q").ordinal - pd.Period(st["start_qe"], freq="Q").ordinal) + 1
            else:
                age_q = int(st["age0"] + t + 1)
            age_bucket = make_age_bucket(age_q)
            strategy = st["strategy"]
            grade = st["grade"]

            fund_end = st.get("fund_end_qe", pd.NaT)
            if pd.notna(fund_end) and qe > fund_end:
                st["nav"] = 0.0
                continue

            fund_start = st.get("start_qe", None)
            if pd.notna(fund_start) and qe < fund_start:
                continue

            active_count[t] += 1

            # timing probabilities (hierarchical fallback)
            tp = get_timing_probs(strategy, grade, age_bucket)
            if tp is None:
                timing_key_missing[t] += 1
            p_rep = float(tp.get("p_rep", 0.0))
            if not np.isfinite(p_rep):
                rep_prep_nan[t] += 1
            p_rc = float(tp.get("p_rc_given_rep", 0.0))

            # calibrate draw frequency to historical (multiplier + blend)
            p_draw = get_p_draw_adjusted(strategy, grade, age_bucket)

            draw_event = U["draw_event"][i] < p_draw
            # for cumulative draw ratios, relax gating after early years
            if age_bucket not in ("0-3", "4-7"):
                draw_event = True
            rep_event = U["rep_event"][i] < p_rep
            if rep_event:
                rep_event_count[t] += 1

            draw_reason = None
            if not draw_event:
                draw_reason = "gate"

            # draw ratio
            rkey = (strategy, grade, age_bucket, "draw_ratio")
            rr = ratio_sel.get(rkey)
            draw_ratio = 0.0
            if draw_event and rr is not None:
                dist = rr.get("dist")
                params = parse_params(rr.get("params"))
                draw_ratio = sample_from_dist(dist, params, float(U["draw_size"][i]))
            if DRAW_RATIO_CAP is not None:
                draw_ratio = float(np.clip(draw_ratio, 0.0, DRAW_RATIO_CAP))

            ledger = st["ledger"]
            rc_avail = ledger.available(t)
            remaining_commit = max(st["commitment"] - st["dd_commit"], 0.0)
            capacity = remaining_commit + rc_avail

            # draw_ratio now represents target cumulative draw ratio (cumulative draw / commitment)
            target_ratio = draw_ratio
            denom = st["commitment"] + st.get("recall_cum", 0.0)
            target_cum = target_ratio * denom
            gap_cum = max(target_cum - st["draw_cum"], 0.0)
            # smooth catch-up using data-driven draw pace by age bucket
            if gap_cum > 0 and denom > 0:
                # use conditional pace if draws happen every quarter (no gating), else unconditional
                if age_bucket in ("0-3", "4-7"):
                    pace = get_draw_pace(strategy, grade, age_bucket, use_positive=False)
                    if pace is None or not np.isfinite(pace) or pace <= 0:
                        pace = draw_pace_global_all if draw_pace_global_all > 0 else 0.0
                else:
                    pace = get_draw_pace(strategy, grade, age_bucket, use_positive=True)
                    if pace is None or not np.isfinite(pace) or pace <= 0:
                        pace = draw_pace_global_pos if draw_pace_global_pos > 0 else 0.0
                    # scale pace upward using adjusted vs model p_draw (uplift-only)
                    model_p = get_model_p_draw(strategy, grade, age_bucket)
                    adj_p = get_p_draw_adjusted(strategy, grade, age_bucket)
                    if model_p is not None and np.isfinite(model_p) and model_p > 0:
                        scale = adj_p / model_p if np.isfinite(adj_p) else 1.0
                        if np.isfinite(scale) and scale > 1.0:
                            pace = pace * scale
                pace = pace * PACE_SCALE
                gap_ratio = gap_cum / denom if denom > 0 else 0.0
                if pace > 0:
                    exp_draw_q = max(1, int(round(gap_ratio / pace)))
                else:
                    exp_draw_q = 1
                # cap to remaining quarters if fund_end_qe is known
                if pd.notna(st.get("fund_end_qe")):
                    rem_q = int(pd.Period(st["fund_end_qe"], freq="Q").ordinal - pd.Period(qe, freq="Q").ordinal)
                    exp_draw_q = min(exp_draw_q, max(rem_q, 1))
                draw_amt = min(gap_cum / exp_draw_q, gap_cum)
            else:
                draw_amt = 0.0
            if draw_amt > capacity:
                draw_amt = capacity
            if draw_amt <= 0:
                if draw_reason is None:
                    if gap_cum <= 0 or denom <= 0:
                        draw_reason = "target"
                    elif capacity <= 0:
                        draw_reason = "capacity"
                    else:
                        draw_reason = "pace"
            if draw_amt > 0:
                draw_positive[t] += 1
            else:
                if draw_reason == "gate":
                    zero_gate[t] += 1
                elif draw_reason == "target":
                    zero_target[t] += 1
                elif draw_reason == "capacity":
                    zero_capacity[t] += 1
                elif draw_reason == "pace":
                    zero_pace[t] += 1
            cons = ledger.consume_for_drawdown(t, draw_amt)
            st["dd_commit"] += cons["use_commitment"]

            # repayment ratio
            NAV_prev = float(st["nav"])
            if NAV_prev > 1.0:
                rep_navpos_count[t] += 1
            rep_ratio = 0.0
            if rep_event and NAV_prev > 1.0:
                rkey = (strategy, grade, age_bucket, "rep_ratio")
                rr = ratio_sel.get(rkey)
                if rr is None:
                    rep_rr_missing[t] += 1
                if rr is not None:
                    dist = rr.get("dist")
                    params = parse_params(rr.get("params"))
                    rep_ratio = sample_from_dist(dist, params, float(U["rep_size"][i]))
            rep_ratio = float(np.clip(rep_ratio, 0.0, 1.0))
            # accumulate repayment stats by bucket (per-sim aggregated)
            if NAV_prev > 1.0:
                k = _rep_stat_key(strategy, grade, age_bucket)
                pos = 1 if rep_ratio > 0 else 0
                _rep_stat_update(
                    k,
                    obs_inc=1,
                    event_inc=1 if rep_event else 0,
                    ratio_val=rep_ratio,
                    pos_inc=pos,
                    pos_ratio_val=rep_ratio if pos else 0.0,
                )

            rep_ratio = float(np.clip(rep_ratio, 0.0, 1.0))
            rep_amt = rep_ratio * NAV_prev

            # recallable
            rc_amt = 0.0
            if rep_amt > 0 and (U["rc_event"][i] < p_rc):
                rkey = (strategy, grade, age_bucket, "rc_ratio_given_rep")
                rr = ratio_sel.get(rkey)
                if rr is not None:
                    dist = rr.get("dist")
                    params = parse_params(rr.get("params"))
                    rc_ratio = sample_from_dist(dist, params, float(U["rc_size"][i]))
                    rc_ratio = float(np.clip(rc_ratio, 0.0, 1.0))
                    rc_target = rc_ratio * rep_amt
                    # cap recallable by cumulative repayments and cumulative drawdowns
                    max_by_rep = max((st["rep_cum"] + rep_amt) - st.get("recall_cum", 0.0), 0.0)
                    max_by_draw = max((st["draw_cum"] + draw_amt) - st.get("recall_cum", 0.0), 0.0)
                    rc_target = min(rc_target, max_by_rep, max_by_draw)
                    rc_amt = ledger.add_recallable(t, rc_target, enforce_cap=True)

            # NAV update
            nav_after_flow = max(NAV_prev + draw_amt - rep_amt, 0.0)
            omega = 0.0
            if OMEGA_MODE == "global" and omega_sig > 0:
                omega = float(rng.normal(omega_mu, omega_sig))
            elif OMEGA_MODE == "calibrated" and omega_sel is not None:
                ok = (strategy, grade, age_bucket)
                op = omega_sel.get(ok)
                if op is None:
                    op = next(iter([v for k, v in omega_sel.items() if k[0] == strategy]), None)
                if op is not None:
                    a0 = float(op.get("a_intercept", 0.0))
                    b0 = float(op.get("b0", 0.0))
                    b1 = float(op.get("b1", 0.0))
                    alpha = float(op.get("alpha", 0.0))
                    sigma = float(op.get("sigma", 0.0))
                    msci_r = float(msci_series[t])
                    msci_r_lag1 = float(msci_lag_series[t])
                    omega = (a0 + alpha) + b0 * msci_r + b1 * msci_r_lag1 + sigma * float(rng.standard_normal())

            if OMEGA_CLIP is not None:
                omega = float(np.clip(omega, OMEGA_CLIP[0], OMEGA_CLIP[1]))

            nav_after = max(nav_after_flow * (1.0 + omega), 0.0)

            rem_q = None
            if pd.notna(st.get("fund_end_qe")):
                rem_q = int(pd.Period(st["fund_end_qe"], freq="Q").ordinal - pd.Period(qe, freq="Q").ordinal) + 1
            if NAV_ANCHOR_ENABLED:
                paid_in_after = st["draw_cum"] + draw_amt
                if paid_in_after > 0:
                    target = NAV_TARGETS.get((strategy, age_bucket))
                    if target is None and NAV_TARGETS_STRAT:
                        target = NAV_TARGETS_STRAT.get(strategy)
                    if target is not None:
                        age_idx = AGE_BUCKET_ORDER.get(age_bucket, -1)
                        min_idx = AGE_BUCKET_ORDER.get(NAV_ANCHOR_MIN_AGE_BUCKET, 0)
                        use_anchor = age_idx >= min_idx
                        if rem_q is not None and rem_q <= NAV_ANCHOR_END_Q:
                            use_anchor = True
                        if use_anchor:
                            lam = NAV_LAM_STRAT.get(strategy, NAV_ANCHOR_LAMBDA_DEFAULT)
                            lam_end = NAV_LAM_END_STRAT.get(strategy, NAV_ANCHOR_END_LAMBDA_DEFAULT)
                            if rem_q is not None and rem_q <= NAV_ANCHOR_END_Q:
                                if np.isfinite(lam_end):
                                    lam = lam_end
                            current_ratio = nav_after / paid_in_after
                            adj = 1.0 + lam * (target - current_ratio)
                            adj = float(np.clip(adj, NAV_ANCHOR_MIN_MULT, NAV_ANCHOR_MAX_MULT))
                            nav_after = max(nav_after * adj, 0.0)
            st["nav"] = nav_after

            # update cumulative flows + cashflow history
            st["draw_cum"] += draw_amt
            denom = st["commitment"] + st.get("recall_cum", 0.0)
            st["draw_cum_ratio"] = st["draw_cum"] / denom if denom else 0.0
            st["rep_cum"] += rep_amt
            if draw_amt != 0.0 or rep_amt != 0.0:
                st["cf_amounts"].append(-draw_amt + rep_amt)
                st["cf_dates"].append(qe)

            sim_draw[s, t] += draw_amt
            sim_rep[s, t] += rep_amt
            sim_nav[s, t] += nav_after
            nav_sum_fund[fund_index[fid], t] += nav_after
            draw_sum_fund[fund_index[fid], t] += draw_amt
            rep_sum_fund[fund_index[fid], t] += rep_amt
        # update grades yearly using simulated performance
        if GRADE_UPDATE_ENABLED and (t + 1) % 4 == 0:
            metrics = []
            for fid in fund_ids:
                st = state[fid]
                paid_in = float(abs(st["draw_cum"]))
                distributed = float(abs(st["rep_cum"]))
                nav = float(abs(st["nav"]))
                dpi = distributed / paid_in if paid_in > 0 else np.nan
                tvpi = (distributed + nav) / paid_in if paid_in > 0 else np.nan
                irr = compute_state_irr(st, qe, tvpi)
                is_invest = False
                if pd.notna(st.get("invest_end")):
                    is_invest = qe <= st.get("invest_end")
                metrics.append({
                    "FundID": fid,
                    "AdjStrategy": st["strategy"],
                    "DPI": dpi,
                    "TVPI": tvpi,
                    "IRR": irr,
                    "IsInvestmentPeriod": is_invest,
                })
            metrics_df = pd.DataFrame(metrics)
            metrics_df = assign_current_grades(metrics_df)
            for row in metrics_df.itertuples(index=False):
                st = state[row.FundID]
                # anchor grade to first grade for first 5 years since fund start
                if pd.notna(st.get("start_qe")):
                    q_since = int(pd.Period(qe, freq="Q").ordinal - pd.Period(st["start_qe"], freq="Q").ordinal) + 1
                else:
                    q_since = int(st.get("age0", 0)) + t + 1
                if q_since < GRADE_ANCHOR_Q:
                    if st.get("grade_seed") is not None:
                        st["grade"] = str(st.get("grade_seed"))
                    continue
                if pd.notna(row.CurrentGrade):
                    st["grade"] = str(row.CurrentGrade)




out_dir = Path(PROJ_DIR) / "sim_outputs"
# write MSCI projected paths
try:
    if proj_mc is not None:
        proj_mc.to_csv(out_dir / "msci_paths.csv", index=False)
        print("Wrote:", out_dir / "msci_paths.csv")
except Exception as e:
    print("MSCI path output skipped:", e)

out_dir.mkdir(parents=True, exist_ok=True)
# write repayment stats by bucket
try:
    rows = []
    for (strategy, grade, age_bucket), b in rep_bucket_stats.items():
        obs = b.get("obs", 0)
        events = b.get("events", 0)
        pos = b.get("pos", 0)
        sum_ratio = b.get("sum_ratio", 0.0)
        sum_ratio_pos = b.get("sum_ratio_pos", 0.0)
        p_rep = (events / obs) if obs else np.nan
        mean_uncond = (sum_ratio / obs) if obs else np.nan
        mean_cond = (sum_ratio_pos / pos) if pos else np.nan
        rows.append({
            "Adj Strategy": strategy,
            "Grade": grade,
            "AgeBucket": age_bucket,
            "n_obs": obs,
            "n_events": events,
            "p_rep": p_rep,
            "rep_ratio_mean_uncond": mean_uncond,
            "rep_ratio_mean_cond": mean_cond,
        })
    rep_stats_df = pd.DataFrame(rows)
    rep_stats_df.to_csv(out_dir / "sim_rep_stats_by_bucket.csv", index=False)
    print("Wrote:", out_dir / "sim_rep_stats_by_bucket.csv")
except Exception as e:
    print("Rep stats output skipped:", e)

out_dir.mkdir(parents=True, exist_ok=True)

pd.DataFrame({
    "quarter_end": quarters,
    "sim_draw_mean": sim_draw.mean(axis=0),
    "sim_rep_mean": sim_rep.mean(axis=0),
    "sim_nav_mean": sim_nav.mean(axis=0),
}).to_csv(out_dir / "sim_portfolio_series.csv", index=False)

print("Wrote:", out_dir / "sim_portfolio_series.csv")
# write fund-level mean NAV paths
nav_mean = nav_sum_fund / float(N_SIMS)
rows = []
for i, fid in enumerate(fund_ids):
    for t, qe in enumerate(quarters):
        rows.append({"FundID": fid, "quarter_end": qe, "nav_mean": nav_mean[i, t]})
fund_nav_path = out_dir / "sim_fund_nav_mean.csv"
pd.DataFrame(rows).to_csv(fund_nav_path, index=False)
print("Wrote:", fund_nav_path)

# write fund-level end-of-life NAV (mean)
end_rows = []
for i, fid in enumerate(fund_ids):
    st = fund_states.get(fid, {})
    fe = st.get("fund_end_qe", None)
    if pd.notna(fe):
        q_idx = int(pd.Period(fe, freq="Q").ordinal - pd.Period(start_qe, freq="Q").ordinal)
        q_idx = max(0, min(q_idx-1, len(quarters)-1))
    else:
        q_idx = len(quarters)-1
    end_rows.append({"FundID": fid, "fund_end_qe": fe, "nav_end_mean": nav_mean[i, q_idx]})
fund_nav_end = out_dir / "sim_fund_nav_end_mean.csv"
pd.DataFrame(end_rows).to_csv(fund_nav_end, index=False)
print("Wrote:", fund_nav_end)

# write fund-level mean draw/rep paths
draw_mean = draw_sum_fund / float(N_SIMS)
rep_mean = rep_sum_fund / float(N_SIMS)
rows = []
for i, fid in enumerate(fund_ids):
    for t, qe in enumerate(quarters):
        rows.append({"FundID": fid, "quarter_end": qe, "draw_mean": draw_mean[i, t]})
fund_draw_path = out_dir / "sim_fund_draw_mean.csv"
pd.DataFrame(rows).to_csv(fund_draw_path, index=False)
print("Wrote:", fund_draw_path)

rows = []
for i, fid in enumerate(fund_ids):
    for t, qe in enumerate(quarters):
        rows.append({"FundID": fid, "quarter_end": qe, "rep_mean": rep_mean[i, t]})
fund_rep_path = out_dir / "sim_fund_rep_mean.csv"
pd.DataFrame(rows).to_csv(fund_rep_path, index=False)
print("Wrote:", fund_rep_path)

# write fund-level end-of-life draw/rep (mean)
end_rows = []
for i, fid in enumerate(fund_ids):
    st = fund_states.get(fid, {})
    fe = st.get("fund_end_qe", None)
    if pd.notna(fe):
        q_idx = int(pd.Period(fe, freq="Q").ordinal - pd.Period(start_qe, freq="Q").ordinal)
        q_idx = max(0, min(q_idx-1, len(quarters)-1))
    else:
        q_idx = len(quarters)-1
    end_rows.append({"FundID": fid, "fund_end_qe": fe, "draw_end_mean": draw_mean[i, q_idx], "rep_end_mean": rep_mean[i, q_idx]})
fund_flow_end = out_dir / "sim_fund_flow_end_mean.csv"
pd.DataFrame(end_rows).to_csv(fund_flow_end, index=False)
print("Wrote:", fund_flow_end)

# diagnostic pace check (last N historical vs first N projected; N defaults to full horizon)
try:
    hist_agg = df.groupby("quarter_end", as_index=False).agg(
        hist_draw=("Adj Drawdown EUR", lambda s: pd.to_numeric(s, errors="coerce").abs().sum()),
    ).sort_values("quarter_end")
    hist_last = hist_agg[hist_agg["quarter_end"] <= start_qe].tail(PACE_CALIB_N_Q)
    proj_first = pd.read_csv(out_dir / "sim_portfolio_series.csv").head(PACE_CALIB_N_Q)
    hist_sum = float(hist_last["hist_draw"].sum())
    proj_sum = float(proj_first["sim_draw_mean"].sum())
    if proj_sum > 0:
        suggested = hist_sum / proj_sum
        print(f"Suggested PACE_SCALE to match history: {suggested:.2f}")
except Exception as e:
    print("PACE_SCALE suggestion skipped:", e)


# --- Diagnostics summary (last 10 historical vs first 10 projected) ---
try:
    hist_agg = df.groupby("quarter_end", as_index=False).agg(
        hist_draw=("Adj Drawdown EUR", lambda s: pd.to_numeric(s, errors="coerce").abs().sum()),
        hist_rep=("Adj Repayment EUR", lambda s: pd.to_numeric(s, errors="coerce").abs().sum()),
        hist_nav=("NAV Adjusted EUR", lambda s: pd.to_numeric(s, errors="coerce").abs().sum()),
        active_funds=("FundID", "nunique"),
    ).sort_values("quarter_end")
    hist_last = hist_agg[hist_agg["quarter_end"] <= start_qe].tail(10)
    proj_first = pd.read_csv(out_dir / "sim_portfolio_series.csv").head(10)
    active_sim = (nav_mean > 1e-6).sum(axis=0)[: len(proj_first)]
    diag = {
        "hist_draw_sum": float(hist_last["hist_draw"].sum()),
        "proj_draw_sum": float(proj_first["sim_draw_mean"].sum()),
        "hist_rep_sum": float(hist_last["hist_rep"].sum()),
        "proj_rep_sum": float(proj_first["sim_rep_mean"].sum()),
        "hist_nav_avg": float(hist_last["hist_nav"].mean()),
        "proj_nav_avg": float(proj_first["sim_nav_mean"].mean()),
        "hist_draw_per_active": float((hist_last["hist_draw"] / hist_last["active_funds"]).mean()),
        "proj_draw_per_active": float((proj_first["sim_draw_mean"] / active_sim).mean()),
        "hist_rep_per_active": float((hist_last["hist_rep"] / hist_last["active_funds"]).mean()),
        "proj_rep_per_active": float((proj_first["sim_rep_mean"] / active_sim).mean()),
    }
    pd.DataFrame([diag]).to_csv(out_dir / "sim_diagnostics_summary.csv", index=False)
    print("Wrote:", out_dir / "sim_diagnostics_summary.csv")
except Exception as e:
    print("Diagnostics summary skipped:", e)

# --- Diagnostics by age bucket ---
try:
    df_diag = df.copy()
    df_diag["AgeBucket"] = pd.cut(
        pd.to_numeric(df_diag.get("Fund_Age_Quarters"), errors="coerce"),
        bins=AGE_BINS_Q,
        labels=AGE_LABELS,
    )
    df_diag["draw_flag"] = pd.to_numeric(df_diag.get("Adj Drawdown EUR"), errors="coerce").abs().fillna(0.0) > 0
    hist_p_draw = df_diag.groupby("AgeBucket")["draw_flag"].mean().rename("hist_p_draw")

    tp = pd.read_csv(Path(FIT_DIR) / "timing_probs_selected.csv")
    tp_p = tp.groupby("AgeBucket")["p_draw"].mean().rename("p_draw_sel")

    rr = pd.read_csv(Path(FIT_DIR) / "ratio_fit_selected.csv")
    rr = rr[rr["ratio"] == "draw_ratio"].copy()
    rr["target"] = rr["median_cap"].fillna(rr["data_median"])
    rr["n"] = pd.to_numeric(rr.get("n"), errors="coerce").fillna(0)
    rr_age = rr.groupby("AgeBucket").apply(
        lambda g: (g["target"] * g["n"]).sum() / g["n"].sum() if g["n"].sum() > 0 else g["target"].mean(),
        include_groups=False,
    ).rename("target_mean")

    out = pd.concat([hist_p_draw, tp_p, rr_age], axis=1).reset_index()
    out.to_csv(out_dir / "sim_diagnostics_by_age.csv", index=False)
    print("Wrote:", out_dir / "sim_diagnostics_by_age.csv")
except Exception as e:
    print("Diagnostics by age skipped:", e)

# --- Diagnostics: draw zero reasons by quarter ---
try:
    total_active = active_count if active_count is not None else None
    if total_active is not None:
        df_zero = pd.DataFrame({
            "quarter_end": quarters,
            "active_funds": total_active,
            "draw_positive": draw_positive,
            "zero_gate": zero_gate,
            "zero_target": zero_target,
            "zero_capacity": zero_capacity,
            "zero_pace": zero_pace,
        })
        # shares
        for c in ["draw_positive","zero_gate","zero_target","zero_capacity","zero_pace"]:
            df_zero[c + "_share"] = np.where(df_zero["active_funds"] > 0, df_zero[c] / df_zero["active_funds"], np.nan)
        df_zero.to_csv(out_dir / "draw_zero_reasons_by_quarter.csv", index=False)
        summary = {
            "active_funds": float(df_zero["active_funds"].sum()),
            "draw_positive": float(df_zero["draw_positive"].sum()),
            "zero_gate": float(df_zero["zero_gate"].sum()),
            "zero_target": float(df_zero["zero_target"].sum()),
            "zero_capacity": float(df_zero["zero_capacity"].sum()),
            "zero_pace": float(df_zero["zero_pace"].sum()),
        }
        df_sum = pd.DataFrame([summary])
        df_sum.to_csv(out_dir / "draw_zero_reasons_summary.csv", index=False)
        print("Wrote:", out_dir / "draw_zero_reasons_by_quarter.csv")
        print("Wrote:", out_dir / "draw_zero_reasons_summary.csv")
except Exception as e:
    print("Draw zero diagnostics skipped:", e)

# --- Diagnostics: repayment gating ---
try:
    df_repdiag = pd.DataFrame({
        "quarter_end": quarters,
        "rep_event_count": rep_event_count,
        "rep_navpos_count": rep_navpos_count,
        "rep_rr_missing": rep_rr_missing,
        "rep_prep_nan": rep_prep_nan,
        "timing_key_missing": timing_key_missing,
    })
    df_repdiag.to_csv(out_dir / 'repayment_diagnostics_by_quarter.csv', index=False)
    print('Wrote:', out_dir / 'repayment_diagnostics_by_quarter.csv')
except Exception as e:
    print('Repayment diagnostics skipped:', e)



Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/msci_paths.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_rep_stats_by_bucket.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_portfolio_series.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_fund_nav_mean.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_fund_nav_end_mean.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_fund_draw_mean.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_fund_rep_mean.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_fund_flow_end_mean.csv
Suggested PACE_SCALE to match history: 0.00
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_diagnostics_summary.csv
Wrote: model_fits/runs/test_portfolio_2025Q3/projection/sim_outputs/sim_diagnostics_by_age.csv
Wrote: model_fits/runs/test_portf

  hist_p_draw = df_diag.groupby("AgeBucket")["draw_flag"].mean().rename("hist_p_draw")
