# Cashflow Simulation (Fitted Models)

Lightweight simulator using fitted timing + ratio distributions, grade transitions, and copula.
Includes calibrated omega if `omega_selected.csv` is available.

In [None]:
from pathlib import Path
import ast
import json
import numpy as np
import pandas as pd
from dataclasses import dataclass, field
from math import sqrt
from scipy import stats

In [None]:
INPUT_PATH = "anonymized.csv"
FIT_DIR = "model_fits/outputs"
TRANS_DIR = "model_fits/outputs/transitions"
COPULA_PATH = "model_fits/outputs/copula_params.json"

START_YEAR = 2018
START_QUARTER = "Q1"
HORIZON_Q = 20
N_SIMS = 50
SEED = 1234

OMEGA_MODE = "none"  # "none", "global", or "calibrated"
MSCI_PATH = "msci.xlsx"
MSCI_MODE = "unconditional"  # "conditional" or "unconditional"

if not Path(INPUT_PATH).exists():
    candidates = list(Path.cwd().glob("**/anonymized.csv"))
    if candidates:
        INPUT_PATH = str(candidates[0])
    else:
        raise FileNotFoundError("anonymized.csv not found. Set INPUT_PATH to the full path.")

print("Using INPUT_PATH:", INPUT_PATH)
Path(FIT_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
AGE_BINS_Q = [-1, 3, 7, 11, 15, 19, 1000]
AGE_LABELS = ["0-3", "4-7", "8-11", "12-15", "16-19", "20+"]
GRADE_STATES = ["A", "B", "C", "D"]


def _norm_key(s: str) -> str:
    return " ".join(s.strip().lower().replace("_", " ").split())


def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
    col_map = {_norm_key(c): c for c in df.columns}
    def _get(name: str) -> str:
        k = _norm_key(name)
        return col_map.get(k, name)

    rename = {}
    rename[_get("Adj strategy")] = "Adj Strategy"
    rename[_get("Adj Strategy")] = "Adj Strategy"
    rename[_get("Quarter of Transaction Date")] = "Quarter"
    rename[_get("Year of Transaction Date")] = "Year"
    rename[_get("FundID")] = "FundID"
    rename[_get("Grade")] = "Grade"
    rename[_get("Current Grade")] = "Grade_Current"
    rename[_get("CurrentGrade")] = "Grade_Current"
    rename[_get("Grade Current")] = "Grade_Current"
    rename[_get("Grade_Current")] = "Grade_Current"
    rename[_get("Adj Drawdown EUR")] = "Adj Drawdown EUR"
    rename[_get("Adj Repayment EUR")] = "Adj Repayment EUR"
    rename[_get("Recallable")] = "Recallable"
    rename[_get("NAV Adjusted EUR")] = "NAV Adjusted EUR"
    rename[_get("Commitment EUR")] = "Commitment EUR"
    rename[_get("Signed Amount EUR")] = "Signed Amount EUR"
    rename[_get("Capacity")] = "Capacity"
    rename[_get("Fund_Age_Quarters")] = "Fund_Age_Quarters"
    rename[_get("draw_cum_prev")] = "draw_cum_prev"
    rename[_get("Recallable_Percentage_Decimal")] = "Recallable_Percentage_Decimal"
    rename[_get("Expiration_Quarters")] = "Expiration_Quarters"
    return df.rename(columns=rename)


def parse_quarter(q) -> float:
    if pd.isna(q):
        return np.nan
    if isinstance(q, (int, np.integer, float, np.floating)):
        return float(q)
    s = str(q).strip().upper()
    if s.startswith("Q"):
        s = s[1:]
    try:
        return float(s)
    except Exception:
        return np.nan


def add_quarter_end(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["Quarter"] = df["Quarter"].apply(parse_quarter)
    df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
    m = df["Year"].notna() & df["Quarter"].notna()
    years = df.loc[m, "Year"].astype(int)
    quarters = df.loc[m, "Quarter"].astype(int)
    df.loc[m, "quarter_end"] = pd.PeriodIndex(year=years, quarter=quarters, freq="Q").to_timestamp("Q")
    return df


def apply_current_grade(df: pd.DataFrame, context: str = "") -> pd.DataFrame:
    df = df.copy()
    if "Grade_Current" in df.columns:
        df["Grade"] = df["Grade_Current"]
        if context:
            print(f"Using Grade_Current for {context}.")
        return df
    if all(c in df.columns for c in ["Grade", "FundID", "quarter_end"]):
        df["Grade"] = df["Grade"].astype(str).str.strip()
        df.loc[df["Grade"].isin(["", "nan", "None", "NaN", "<NA>"]), "Grade"] = np.nan
        df = df.sort_values(["FundID", "quarter_end"])
        df["Grade_Current"] = df.groupby("FundID")["Grade"].ffill()
        df["Grade"] = df["Grade_Current"]
        if context:
            print(f"Computed Grade_Current (forward fill) for {context}.")
    return df


def make_age_bucket(age_q: int) -> str:
    for i in range(len(AGE_BINS_Q) - 1):
        if AGE_BINS_Q[i] < age_q <= AGE_BINS_Q[i + 1]:
            return AGE_LABELS[i]
    return AGE_LABELS[-1]


def one_factor_uniforms(n: int, rng: np.random.Generator, rho: float) -> np.ndarray:
    Z = rng.standard_normal()
    eps = rng.standard_normal(n)
    z = rho * Z + sqrt(1.0 - rho * rho) * eps
    return stats.norm.cdf(z)


def load_fit_table(path: str, key_cols: list) -> dict:
    df = pd.read_csv(path)
    out = {}
    for _, r in df.iterrows():
        key = tuple(r[c] for c in key_cols)
        out[key] = r.to_dict()
    return out


def sample_from_dist(dist_name: str, params, u: float) -> float:
    dist = getattr(stats, dist_name)
    return float(dist.ppf(u, *params))


@dataclass
class RecallableBucket:
    created_q: int
    expiry_q: int
    amount_remaining: float


@dataclass
class RecallableLedger:
    rho: float
    expiry_quarters: int
    commitment: float
    buckets: list = field(default_factory=list)

    def _rc_cap(self) -> float:
        return max(float(self.rho), 0.0) * max(float(self.commitment), 0.0)

    def drop_expired(self, q: int) -> None:
        if int(self.expiry_quarters) <= 0:
            self.buckets = []
            return
        self.buckets = [b for b in self.buckets if b.expiry_q >= q and b.amount_remaining > 0]

    def available(self, q: int) -> float:
        self.drop_expired(q)
        return float(sum(b.amount_remaining for b in self.buckets))

    def add_recallable(self, q: int, rc_amount: float, enforce_cap: bool = True) -> float:
        self.drop_expired(q)
        x = max(float(rc_amount or 0.0), 0.0)
        if x <= 0.0 or int(self.expiry_quarters) <= 0:
            return 0.0
        add_amt = x
        if enforce_cap:
            cap = self._rc_cap()
            cur = self.available(q)
            room = max(cap - cur, 0.0)
            add_amt = min(add_amt, room)
        if add_amt <= 0.0:
            return 0.0
        self.buckets.append(RecallableBucket(created_q=q, expiry_q=q + int(self.expiry_quarters), amount_remaining=float(add_amt)))
        return float(add_amt)

    def consume_for_drawdown(self, q: int, draw_amount: float) -> dict:
        self.drop_expired(q)
        need = max(float(draw_amount or 0.0), 0.0)
        if need <= 0.0:
            return {"use_rc": 0.0, "use_commitment": 0.0}
        self.buckets.sort(key=lambda b: b.created_q)
        use_rc = 0.0
        for b in self.buckets:
            if need <= 0:
                break
            take = min(b.amount_remaining, need)
            b.amount_remaining -= take
            need -= take
            use_rc += take
        return {"use_rc": use_rc, "use_commitment": max(draw_amount - use_rc, 0.0)}

In [None]:
# --- Load data + fits ---

df = pd.read_csv(INPUT_PATH, engine="python")
df = normalize_columns(df)
df = add_quarter_end(df)
df = df.dropna(subset=["FundID", "quarter_end"])
df = apply_current_grade(df, context="simulation")

start_qe = pd.Period(f"{START_YEAR}Q{START_QUARTER[-1]}", freq="Q").to_timestamp("Q")
quarters = [start_qe + pd.offsets.QuarterEnd(i) for i in range(1, HORIZON_Q + 1)]

ratio_sel = load_fit_table(Path(FIT_DIR) / "ratio_fit_selected.csv", ["Adj Strategy", "Grade", "AgeBucket", "ratio"])
timing_sel = load_fit_table(Path(FIT_DIR) / "timing_probs_selected.csv", ["Adj Strategy", "Grade", "AgeBucket"])

omega_sel = None
omega_sel_path = Path(FIT_DIR) / "omega_selected.csv"
if omega_sel_path.exists():
    omega_sel = load_fit_table(omega_sel_path, ["Adj Strategy", "Grade", "AgeBucket"])
    print("Loaded omega_selected.csv")

rho_event = 0.25
rho_size = 0.15
if Path(COPULA_PATH).exists():
    cop = json.loads(Path(COPULA_PATH).read_text())
    rho_event = float(cop.get("rho_event", rho_event))
    rho_size = float(cop.get("rho_size", rho_size))

trans_all_path = Path(TRANS_DIR) / "grade_transition_1y_all.csv"
trans_all = pd.read_csv(trans_all_path, index_col=0) if trans_all_path.exists() else None

# MSCI quarterly returns
msci_q = None
if Path(MSCI_PATH).exists():
    msci_raw = pd.read_excel(MSCI_PATH)
    if "Date" in msci_raw.columns and "SCXP Index" in msci_raw.columns:
        msci_raw = msci_raw[["Date", "SCXP Index"]].copy()
        msci_raw["Date"] = pd.to_datetime(msci_raw["Date"], errors="coerce")
        msci_raw["SCXP Index"] = pd.to_numeric(msci_raw["SCXP Index"], errors="coerce")
        msci_raw = msci_raw.dropna(subset=["Date", "SCXP Index"]).sort_values("Date")
        msci_raw["quarter_end"] = msci_raw["Date"].dt.to_period("Q").dt.to_timestamp("Q")
        msci_q = (msci_raw.groupby("quarter_end", as_index=False)["SCXP Index"]
                  .last().rename(columns={"SCXP Index": "index_level"})
                  .sort_values("quarter_end").reset_index(drop=True))
        msci_q["msci_ret_q"] = msci_q["index_level"].pct_change()
        msci_q = msci_q.dropna(subset=["msci_ret_q"]).reset_index(drop=True)
        msci_q["msci_ret_q_lag1"] = msci_q["msci_ret_q"].shift(1)

In [None]:
# --- Build fund states ---

hist = df[df["quarter_end"] <= start_qe].copy()
last = hist.sort_values(["FundID", "quarter_end"]).groupby("FundID").tail(1)

fund_states = {}
for _, r in last.iterrows():
    fid = r["FundID"]
    strategy = r.get("Adj Strategy", "Unknown")
    grade = r.get("Grade", "D")
    age_q = int(pd.to_numeric(r.get("Fund_Age_Quarters", 0), errors="coerce") or 0)
    nav = float(pd.to_numeric(r.get("NAV Adjusted EUR", 0), errors="coerce") or 0.0)
    commitment = float(pd.to_numeric(r.get("Commitment EUR", 0), errors="coerce") or
                       pd.to_numeric(r.get("Signed Amount EUR", 0), errors="coerce") or 0.0)
    dd_cum = float(pd.to_numeric(r.get("draw_cum_prev", 0), errors="coerce") or 0.0)
    rho = float(pd.to_numeric(r.get("Recallable_Percentage_Decimal", 0), errors="coerce") or 0.0)
    exp_q = int(pd.to_numeric(r.get("Expiration_Quarters", 0), errors="coerce") or 0)

    fund_states[fid] = {
        "strategy": strategy,
        "grade": grade,
        "age0": age_q,
        "nav": nav,
        "dd_commit": dd_cum,
        "commitment": commitment,
        "ledger": RecallableLedger(rho=rho, expiry_quarters=exp_q, commitment=commitment),
    }

fund_ids = list(fund_states.keys())

In [None]:
# --- Omega setup ---

omega_mu = 0.0
omega_sig = 0.0

if OMEGA_MODE == "global":
    df2 = df.sort_values(["FundID", "quarter_end"]).copy()
    df2["nav_prev"] = df2.groupby("FundID")["NAV Adjusted EUR"].shift(1)
    df2["flow_net"] = pd.to_numeric(df2["Adj Drawdown EUR"], errors="coerce").fillna(0.0) -                       pd.to_numeric(df2["Adj Repayment EUR"], errors="coerce").fillna(0.0)
    m = df2["nav_prev"].abs() > 1.0
    omega = ((df2.loc[m, "NAV Adjusted EUR"] - df2.loc[m, "nav_prev"]) - df2.loc[m, "flow_net"]) / df2.loc[m, "nav_prev"]
    omega = omega.replace([np.inf, -np.inf], np.nan).dropna()
    if len(omega):
        omega_mu = float(omega.mean())
        omega_sig = float(omega.std(ddof=1))

# MSCI stats for unconditional mode
msci_mu = 0.0
msci_sigma = 0.0
msci_map = {}
msci_lag_map = {}
if msci_q is not None and len(msci_q):
    msci_mu = float(msci_q["msci_ret_q"].mean())
    msci_sigma = float(msci_q["msci_ret_q"].std(ddof=1))
    msci_map = dict(zip(msci_q["quarter_end"], msci_q["msci_ret_q"]))
    msci_lag_map = dict(zip(msci_q["quarter_end"], msci_q["msci_ret_q_lag1"]))
    if not np.isfinite(msci_sigma) or msci_sigma <= 0:
        msci_sigma = 1e-6

rng = np.random.default_rng(SEED)

In [None]:
# --- Run simulation ---

sim_nav = np.zeros((N_SIMS, HORIZON_Q))
sim_draw = np.zeros((N_SIMS, HORIZON_Q))
sim_rep = np.zeros((N_SIMS, HORIZON_Q))

for s in range(N_SIMS):
    # MSCI series for this simulation
    if MSCI_MODE == "conditional" and msci_map:
        msci_series = [float(msci_map.get(qe, 0.0)) for qe in quarters]
        msci_lag_series = [float(msci_lag_map.get(quarters[0], 0.0))] + msci_series[:-1]
    else:
        msci_series = list(rng.normal(msci_mu, msci_sigma, size=len(quarters)))
        msci_lag_series = [msci_series[0]] + msci_series[:-1]

    state = {
        fid: {
            **st,
            "ledger": RecallableLedger(
                rho=st["ledger"].rho,
                expiry_quarters=st["ledger"].expiry_quarters,
                commitment=st["ledger"].commitment,
                buckets=[RecallableBucket(b.created_q, b.expiry_q, b.amount_remaining) for b in st["ledger"].buckets],
            ),
        } for fid, st in fund_states.items()
    }

    for t, qe in enumerate(quarters):
        U = {
            "draw_event": one_factor_uniforms(len(fund_ids), rng, rho_event),
            "draw_size": one_factor_uniforms(len(fund_ids), rng, rho_size),
            "rep_event": one_factor_uniforms(len(fund_ids), rng, rho_event),
            "rep_size": one_factor_uniforms(len(fund_ids), rng, rho_size),
            "rc_event": one_factor_uniforms(len(fund_ids), rng, rho_event),
            "rc_size": one_factor_uniforms(len(fund_ids), rng, rho_size),
        }

        for i, fid in enumerate(fund_ids):
            st = state[fid]
            age_q = int(st["age0"] + t + 1)
            age_bucket = make_age_bucket(age_q)
            strategy = st["strategy"]
            grade = st["grade"]

            # grade transition yearly
            if t > 0 and t % 4 == 0 and trans_all is not None:
                P = trans_all.reindex(index=GRADE_STATES, columns=GRADE_STATES).fillna(0.0)
                row = P.loc[grade].to_numpy(dtype=float)
                if row.sum() > 0:
                    row = row / row.sum()
                    grade = str(rng.choice(GRADE_STATES, p=row))
                    st["grade"] = grade

            # timing probabilities
            key = (strategy, grade, age_bucket)
            tp = timing_sel.get(key, None)
            if tp is None:
                tp = next(iter([v for k, v in timing_sel.items() if k[0] == strategy]), None)
            if tp is None:
                p_draw = 0.0
                p_rep = 0.0
                p_rc = 0.0
            else:
                p_draw = float(tp.get("p_draw", 0.0))
                p_rep = float(tp.get("p_rep", 0.0))
                p_rc = float(tp.get("p_rc_given_rep", 0.0))

            draw_event = U["draw_event"][i] < p_draw
            rep_event = U["rep_event"][i] < p_rep

            # draw ratio
            rkey = (strategy, grade, age_bucket, "draw_ratio")
            rr = ratio_sel.get(rkey)
            draw_ratio = 0.0
            if draw_event and rr is not None:
                dist = rr.get("dist")
                params = rr.get("params")
                params = ast.literal_eval(params) if isinstance(params, str) else params
                draw_ratio = sample_from_dist(dist, params, float(U["draw_size"][i]))
            draw_ratio = float(np.clip(draw_ratio, 0.0, 1.0))

            ledger = st["ledger"]
            rc_avail = ledger.available(t)
            remaining_commit = max(st["commitment"] - st["dd_commit"], 0.0)
            capacity = remaining_commit + rc_avail

            draw_amt = draw_ratio * capacity
            cons = ledger.consume_for_drawdown(t, draw_amt)
            st["dd_commit"] += cons["use_commitment"]

            # repayment ratio
            NAV_prev = float(st["nav"])
            rep_ratio = 0.0
            if rep_event and NAV_prev > 1.0:
                rkey = (strategy, grade, age_bucket, "rep_ratio")
                rr = ratio_sel.get(rkey)
                if rr is not None:
                    dist = rr.get("dist")
                    params = rr.get("params")
                    params = ast.literal_eval(params) if isinstance(params, str) else params
                    rep_ratio = sample_from_dist(dist, params, float(U["rep_size"][i]))
            rep_ratio = float(np.clip(rep_ratio, 0.0, 1.0))
            rep_amt = rep_ratio * NAV_prev

            # recallable
            rc_amt = 0.0
            if rep_amt > 0 and (U["rc_event"][i] < p_rc):
                rkey = (strategy, grade, age_bucket, "rc_ratio_given_rep")
                rr = ratio_sel.get(rkey)
                if rr is not None:
                    dist = rr.get("dist")
                    params = rr.get("params")
                    params = ast.literal_eval(params) if isinstance(params, str) else params
                    rc_ratio = sample_from_dist(dist, params, float(U["rc_size"][i]))
                    rc_ratio = float(np.clip(rc_ratio, 0.0, 1.0))
                    rc_amt = ledger.add_recallable(t, rc_ratio * rep_amt, enforce_cap=True)

            # NAV update
            nav_after_flow = max(NAV_prev + draw_amt - rep_amt, 0.0)
            omega = 0.0
            if OMEGA_MODE == "global" and omega_sig > 0:
                omega = float(rng.normal(omega_mu, omega_sig))
            elif OMEGA_MODE == "calibrated" and omega_sel is not None:
                ok = (strategy, grade, age_bucket)
                op = omega_sel.get(ok)
                if op is None:
                    op = next(iter([v for k, v in omega_sel.items() if k[0] == strategy]), None)
                if op is not None:
                    a0 = float(op.get("a_intercept", 0.0))
                    b0 = float(op.get("b0", 0.0))
                    b1 = float(op.get("b1", 0.0))
                    alpha = float(op.get("alpha", 0.0))
                    sigma = float(op.get("sigma", 0.0))
                    msci_r = float(msci_series[t])
                    msci_r_lag1 = float(msci_lag_series[t])
                    omega = (a0 + alpha) + b0 * msci_r + b1 * msci_r_lag1 + sigma * float(rng.standard_normal())

            nav_after = max(nav_after_flow * (1.0 + omega), 0.0)
            st["nav"] = nav_after

            sim_draw[s, t] += draw_amt
            sim_rep[s, t] += rep_amt
            sim_nav[s, t] += nav_after

out_dir = Path(FIT_DIR) / "sim_outputs"
out_dir.mkdir(parents=True, exist_ok=True)

pd.DataFrame({
    "quarter_end": quarters,
    "sim_draw_mean": sim_draw.mean(axis=0),
    "sim_rep_mean": sim_rep.mean(axis=0),
    "sim_nav_mean": sim_nav.mean(axis=0),
}).to_csv(out_dir / "sim_portfolio_series.csv", index=False)

print("Wrote:", out_dir / "sim_portfolio_series.csv")