# Timing Curves (Event Probabilities)

Fit timing probabilities for draw/rep/recallable events by Strategy×Grade×AgeBucket
with fallback tables.

In [1]:
from pathlib import Path
import os
import numpy as np
import pandas as pd

In [2]:
import os
RUN_TAG = os.environ.get("RUN_TAG")
HIST_END = os.environ.get("HIST_END")
if RUN_TAG is None:
    RUN_TAG = HIST_END or "2025Q3"
BASE_OUT = Path("model_fits") / "runs" / RUN_TAG
CALIB_DIR = BASE_OUT / "calibration"
PROJ_DIR = BASE_OUT / "projection"

INPUT_PATH = "anonymized.csv"
OUT_DIR = str(CALIB_DIR)
MIN_OBS_AGE = 150
MIN_OBS_SG = 200
MIN_OBS_S = 300

if not Path(INPUT_PATH).exists():
    candidates = list(Path.cwd().glob("**/anonymized.csv"))
    if not candidates:
        candidates = list(Path.cwd().parent.glob("**/anonymized.csv"))
    if not candidates:
        candidates = list(Path.cwd().parent.parent.glob("**/anonymized.csv"))
    if candidates:
        INPUT_PATH = str(candidates[0])
    else:
        raise FileNotFoundError("anonymized.csv not found. Set INPUT_PATH to the full path.")

print("Using INPUT_PATH:", INPUT_PATH)
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)
Path(CALIB_DIR).mkdir(parents=True, exist_ok=True)


Using INPUT_PATH: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/anonymized.csv


In [3]:
AGE_BINS_Q = [-1, 3, 7, 11, 15, 19, 1000]
AGE_LABELS = ["0-3", "4-7", "8-11", "12-15", "16-19", "20+"]


def _norm_key(s: str) -> str:
    return " ".join(s.strip().lower().replace("_", " ").split())


def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
    col_map = {_norm_key(c): c for c in df.columns}
    def _get(name: str) -> str:
        k = _norm_key(name)
        return col_map.get(k, name)

    rename = {}
    rename[_get("Adj strategy")] = "Adj Strategy"
    rename[_get("Adj Strategy")] = "Adj Strategy"
    rename[_get("Quarter of Transaction Date")] = "Quarter"
    rename[_get("Year of Transaction Date")] = "Year"
    rename[_get("FundID")] = "FundID"
    rename[_get("Grade")] = "Grade"
    rename[_get("Current Grade")] = "Grade_Current"
    rename[_get("CurrentGrade")] = "Grade_Current"
    rename[_get("Grade Current")] = "Grade_Current"
    rename[_get("Grade_Current")] = "Grade_Current"
    rename[_get("Adj Drawdown EUR")] = "Adj Drawdown EUR"
    rename[_get("Adj Repayment EUR")] = "Adj Repayment EUR"
    rename[_get("Recallable")] = "Recallable"
    rename[_get("Fund_Age_Quarters")] = "Fund_Age_Quarters"
    return df.rename(columns=rename)


def parse_quarter(q) -> float:
    if pd.isna(q):
        return np.nan
    if isinstance(q, (int, np.integer, float, np.floating)):
        return float(q)
    s = str(q).strip().upper()
    if s.startswith("Q"):
        s = s[1:]
    try:
        return float(s)
    except Exception:
        return np.nan


def add_quarter_end(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["Quarter"] = df["Quarter"].apply(parse_quarter)
    df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
    m = df["Year"].notna() & df["Quarter"].notna()
    years = df.loc[m, "Year"].astype(int)
    quarters = df.loc[m, "Quarter"].astype(int)
    df.loc[m, "quarter_end"] = pd.PeriodIndex(year=years, quarter=quarters, freq="Q").to_timestamp("Q")
    return df


def apply_current_grade(df: pd.DataFrame, context: str = "") -> pd.DataFrame:
    df = df.copy()

    if "Grade" in df.columns and "Grade_Seed" not in df.columns:
        df["Grade_Seed"] = df["Grade"]

    if "Grade" in df.columns:
        df["Grade"] = df["Grade"].astype(str).str.strip()
        df.loc[df["Grade"].isin(["", "nan", "None", "NaN", "<NA>"]), "Grade"] = np.nan

    if "quarter_end" not in df.columns:
        df = add_quarter_end(df)

    df["QPeriod"] = df["quarter_end"].dt.to_period("Q")

    cols = [
        "FundID",
        "Adj Strategy",
        "QPeriod",
        "quarter_end",
        "Adj Drawdown EUR",
        "Adj Repayment EUR",
        "NAV Adjusted EUR",
        "First Closing Date",
        "Grade",
    ]
    cols = [c for c in cols if c in df.columns]
    cash = df[cols].copy()
    cash = cash.rename(
        columns={
            "Adj Strategy": "AdjStrategy",
            "quarter_end": "TransactionDate",
            "First Closing Date": "FirstClosingDate",
        }
    )

    cash["TransactionDate"] = pd.to_datetime(cash["TransactionDate"], errors="coerce")
    if "FirstClosingDate" in cash.columns:
        cash["FirstClosingDate"] = pd.to_datetime(cash["FirstClosingDate"], errors="coerce")
    else:
        cash["FirstClosingDate"] = pd.NaT

    for c in ["Adj Drawdown EUR", "Adj Repayment EUR", "NAV Adjusted EUR"]:
        if c in cash.columns:
            cash[c] = pd.to_numeric(cash[c], errors="coerce").fillna(0.0)
        else:
            cash[c] = 0.0

    cash = cash.dropna(subset=["FundID", "TransactionDate"])

    if cash["FirstClosingDate"].isna().any():
        first_tx = cash.groupby("FundID")["TransactionDate"].transform("min")
        cash["FirstClosingDate"] = cash["FirstClosingDate"].fillna(first_tx)

    if cash.empty:
        return df

    fund_strategy = (
        cash.groupby("FundID")["AdjStrategy"]
        .agg(lambda s: s.mode().iat[0] if len(s.mode()) else s.iloc[0])
        .reset_index()
    )

    q_snap = (
        cash.sort_values(["FundID", "TransactionDate"])
        .groupby(["FundID", "QPeriod"], as_index=False)
        .agg(
            AdjStrategy=("AdjStrategy", "last"),
            FirstClosingDate=("FirstClosingDate", "last"),
            QuarterDrawdown=("Adj Drawdown EUR", "sum"),
            QuarterRepayment=("Adj Repayment EUR", "sum"),
            QuarterEndNAV=("NAV Adjusted EUR", "last"),
            QuarterEndDate=("TransactionDate", "max"),
        )
    )

    q_snap = q_snap.sort_values(["FundID", "QPeriod"])
    q_snap["QuarterEndNAV"] = q_snap.groupby("FundID")["QuarterEndNAV"].ffill().fillna(0)

    q_snap["CumDrawdown"] = q_snap.groupby("FundID")["QuarterDrawdown"].cumsum()
    q_snap["CumRepayment"] = q_snap.groupby("FundID")["QuarterRepayment"].cumsum()
    q_snap["PaidIn"] = q_snap["CumDrawdown"].abs()
    q_snap["Distributed"] = q_snap["CumRepayment"].abs()
    q_snap["NAV"] = q_snap["QuarterEndNAV"].abs()

    q_snap["DPI"] = np.where(q_snap["PaidIn"] > 0, q_snap["Distributed"] / q_snap["PaidIn"], np.nan)
    q_snap["TVPI"] = np.where(
        q_snap["PaidIn"] > 0,
        (q_snap["Distributed"] + q_snap["NAV"]) / q_snap["PaidIn"],
        np.nan,
    )

    def xnpv(rate, cfs, dts):
        dts = np.asarray(dts, dtype="datetime64[ns]")
        cfs = np.asarray(cfs, dtype=float)
        t0 = dts[0]
        day_counts = (dts - t0) / np.timedelta64(1, "D")
        years = day_counts / 365.0
        return np.sum(cfs / ((1.0 + rate) ** years))

    def xirr_newton(cfs, dts, guess=0.1, max_iter=80, tol=1e-7):
        dts = np.asarray(dts, dtype="datetime64[ns]")
        cfs = np.asarray(cfs, dtype=float)
        rate = float(guess)
        for _ in range(max_iter):
            f = xnpv(rate, cfs, dts)
            if not np.isfinite(f):
                return np.nan
            if abs(f) < tol:
                return rate
            eps = 1e-6
            f1 = xnpv(rate + eps, cfs, dts)
            df = (f1 - f) / eps
            if df == 0 or not np.isfinite(df):
                return np.nan
            rate_new = rate - f / df
            if rate_new <= -0.999999 or not np.isfinite(rate_new):
                return np.nan
            rate = rate_new
        return np.nan

    def compute_fund_quarter_xirr(fund_cash, fund_q):
        fund_cash = fund_cash.sort_values("TransactionDate")
        cfs = (-fund_cash["Adj Drawdown EUR"].abs() + fund_cash["Adj Repayment EUR"].abs()).to_numpy(dtype=float)
        dts = fund_cash["TransactionDate"].to_numpy(dtype="datetime64[ns]")
        irr_vals = []
        irr_flags = []
        j = 0
        n = len(cfs)
        for r in fund_q.itertuples(index=False):
            q_end = np.datetime64(r.QuarterEndDate, "ns")
            while j < n and dts[j] <= q_end:
                j += 1
            cfs_slice = cfs[:j]
            dts_slice = dts[:j]
            if len(cfs_slice) == 0:
                irr_vals.append(np.nan)
                irr_flags.append("no_txns")
                continue
            terminal_nav = float(abs(r.NAV)) if np.isfinite(r.NAV) else 0.0
            cfs_full = np.append(cfs_slice, terminal_nav)
            dts_full = np.append(dts_slice, q_end).astype("datetime64[ns]")
            if not (np.any(cfs_full < 0) and np.any(cfs_full > 0)):
                irr_vals.append(np.nan)
                irr_flags.append("no_sign_change")
                continue
            tvpi = r.TVPI
            guess = 0.10 if (pd.notna(tvpi) and tvpi > 1.0) else -0.10
            irr = xirr_newton(cfs_full, dts_full, guess=guess)
            if pd.notna(tvpi) and (0.98 <= tvpi <= 1.02):
                if not np.isfinite(irr):
                    irr2 = xirr_newton(cfs_full, dts_full, guess=-guess)
                    if np.isfinite(irr2):
                        irr = irr2
                        irr_flags.append("flat_retry_success")
                    else:
                        irr = 0.0
                        irr_flags.append("flat_to_zero")
                else:
                    irr_flags.append("flat_success")
            else:
                irr_flags.append("ok" if np.isfinite(irr) else "fail")
            irr_vals.append(irr)
        return pd.DataFrame({"IRR": irr_vals, "IRR_Flag": irr_flags})

    irr_rows = []
    for fund_id, fund_q in q_snap.groupby("FundID", sort=False):
        fund_cash = cash[cash["FundID"] == fund_id]
        irr_df = compute_fund_quarter_xirr(fund_cash, fund_q)
        irr_df = irr_df.copy()
        irr_df["FundID"] = fund_id
        irr_df["QPeriod"] = fund_q["QPeriod"].values
        irr_rows.append(irr_df)

    if irr_rows:
        irr_all = pd.concat(irr_rows, ignore_index=True)
        q_snap = q_snap.merge(irr_all, on=["FundID", "QPeriod"], how="left")
    else:
        q_snap["IRR"] = np.nan

    def quartile_to_grade(s):
        r = s.rank(pct=True)
        return pd.cut(r, [0, 0.25, 0.5, 0.75, 1], labels=["D", "C", "B", "A"], include_lowest=True)

    q_snap["Grade_DPI"] = q_snap.groupby(["AdjStrategy", "QPeriod"])["DPI"].transform(quartile_to_grade)
    q_snap["Grade_TVPI"] = q_snap.groupby(["AdjStrategy", "QPeriod"])["TVPI"].transform(quartile_to_grade)
    q_snap["Grade_IRR"] = q_snap.groupby(["AdjStrategy", "QPeriod"])["IRR"].transform(quartile_to_grade)

    DEBT_STRATEGIES = {"Hybrid Debt-Equity", "Private Debt", "Other Private Debt"}
    VC_STRATEGY = "Venture Capital"

    rep = cash[cash["Adj Repayment EUR"].abs() > 0].copy()
    first_repay = rep.groupby("FundID")["TransactionDate"].min().reset_index(name="FirstRepaymentDate")
    first_close = cash.groupby("FundID")["FirstClosingDate"].min().reset_index(name="FirstCloseDate")
    fund_timing = first_close.merge(first_repay, on="FundID", how="left")
    fund_timing = fund_timing.merge(fund_strategy, on="FundID", how="left")
    fund_timing["RepayWithin5Y"] = (
        fund_timing["FirstRepaymentDate"].notna()
        & (fund_timing["FirstRepaymentDate"] <= (fund_timing["FirstCloseDate"] + pd.DateOffset(years=5)))
    )
    fund_timing["BaseYears"] = np.where(fund_timing["RepayWithin5Y"], 5, 6)
    fund_timing["InvestPeriodYears"] = fund_timing["BaseYears"] + 1

    q_snap = q_snap.merge(
        fund_timing[["FundID", "FirstCloseDate", "InvestPeriodYears", "AdjStrategy"]],
        on=["FundID", "AdjStrategy"],
        how="left",
    )
    q_snap["IsInvestmentPeriod"] = q_snap["QuarterEndDate"] <= (
        q_snap["FirstCloseDate"] + q_snap["InvestPeriodYears"].apply(lambda y: pd.DateOffset(years=int(y)))
    )

    fund_counts = (
        q_snap.groupby(["AdjStrategy", "QPeriod"])["FundID"].nunique().rename("StrategyFundCount").reset_index()
    )
    q_snap = q_snap.merge(fund_counts, on=["AdjStrategy", "QPeriod"], how="left")

    q_snap["IsDebt"] = q_snap["AdjStrategy"].isin(DEBT_STRATEGIES)
    q_snap["IsVC"] = q_snap["AdjStrategy"].eq(VC_STRATEGY)

    grade_to_idx = {"A": 0, "B": 1, "C": 2, "D": 3}
    idx_to_grade = {0: "A", 1: "B", 2: "C", 3: "D"}

    def worse_grade(g1, g2):
        if pd.isna(g1):
            return g2
        if pd.isna(g2):
            return g1
        return g1 if grade_to_idx[g1] >= grade_to_idx[g2] else g2

    def downgrade_one_notch(g):
        if pd.isna(g):
            return g
        return idx_to_grade[min(grade_to_idx[g] + 1, 3)]

    def final_grade(row):
        if row["StrategyFundCount"] < 30:
            return worse_grade(row["Grade_DPI"], row["Grade_TVPI"])
        if row["IsDebt"]:
            return row["Grade_IRR"]
        if row["IsInvestmentPeriod"]:
            return row["Grade_TVPI"] if row["IsVC"] else row["Grade_DPI"]
        base = row["Grade_IRR"]
        dpi_g = row["Grade_DPI"]
        if pd.isna(base):
            return base
        if pd.notna(dpi_g) and (grade_to_idx[dpi_g] > grade_to_idx[base]):
            return downgrade_one_notch(base)
        return base

    q_snap["CurrentGrade"] = q_snap.apply(final_grade, axis=1)

    fund_quarters = cash[["FundID", "QPeriod"]].drop_duplicates().sort_values(["FundID", "QPeriod"])
    fund_quarters["RankQ"] = fund_quarters.groupby("FundID").cumcount() + 1
    fund_quarters["Block4"] = (fund_quarters["RankQ"] - 1) // 4 + 1

    fund_first = (
        cash.dropna(subset=["Grade"]).groupby("FundID", as_index=False).first()[["FundID", "Grade"]]
        .rename(columns={"Grade": "FirstGrade"})
    )

    fund_quarters = fund_quarters.merge(fund_first, on="FundID", how="left")
    fund_quarters = fund_quarters.merge(
        q_snap[["FundID", "QPeriod", "CurrentGrade"]], on=["FundID", "QPeriod"], how="left"
    )

    fund_quarters["AssignedGrade"] = np.where(
        (fund_quarters["Block4"] == 1) & fund_quarters["FirstGrade"].notna(),
        fund_quarters["FirstGrade"],
        fund_quarters["CurrentGrade"],
    )

    fund_quarters["AssignedGrade"] = fund_quarters.groupby("FundID")["AssignedGrade"].ffill()

    df = df.merge(fund_quarters[["FundID", "QPeriod", "AssignedGrade"]], on=["FundID", "QPeriod"], how="left")
    df["Grade_Current"] = df["AssignedGrade"]
    if "Grade_Seed" in df.columns:
        df["Grade_Current"] = df["Grade_Current"].fillna(df["Grade_Seed"])
    df["Grade"] = df["Grade_Current"]

    if context:
        print(f"Computed Grade_Current using performance rules for {context}.")

    return df



In [4]:
# --- Load + compute ---

df = pd.read_csv(INPUT_PATH, engine="python")
df = normalize_columns(df)
df = add_quarter_end(df)
df = apply_current_grade(df, context="timing curves")

# Age buckets
if "Fund_Age_Quarters" in df.columns:
    df["AgeBucket"] = pd.cut(pd.to_numeric(df["Fund_Age_Quarters"], errors="coerce"),
                             bins=AGE_BINS_Q, labels=AGE_LABELS)
else:
    df["AgeBucket"] = "ALL"

# Events

df["draw_event"] = pd.to_numeric(df["Adj Drawdown EUR"], errors="coerce").fillna(0.0) > 0

df["rep_event"] = pd.to_numeric(df["Adj Repayment EUR"], errors="coerce").fillna(0.0) > 0

df["rc_event"] = pd.to_numeric(df["Recallable"], errors="coerce").fillna(0.0) > 0
# --- Optional historical cutoff ---
HIST_END_QE = None
if HIST_END:
    try:
        y = int(HIST_END[:4])
        q = int(HIST_END[-1])
        HIST_END_QE = pd.Period(f"{y}Q{q}", freq="Q").to_timestamp("Q")
    except Exception:
        HIST_END_QE = None
if HIST_END_QE is not None:
    df = df[df["quarter_end"] <= HIST_END_QE].copy()
    print(f"Filtered history to <= {HIST_END_QE.date()} (rows={len(df)})")



  df.loc[m, "quarter_end"] = pd.PeriodIndex(year=years, quarter=quarters, freq="Q").to_timestamp("Q")


  q_snap["FirstCloseDate"] + q_snap["InvestPeriodYears"].apply(lambda y: pd.DateOffset(years=int(y)))


Computed Grade_Current using performance rules for timing curves.
Filtered history to <= 2025-09-30 (rows=30857)


In [5]:
# --- Fit levels ---

def _fit_level(level_name: str, cols: list, min_obs: int):
    rows = []
    for gkey, g in df.groupby(cols):
        if not isinstance(gkey, tuple):
            gkey = (gkey,)
        if len(g) < min_obs:
            continue
        n_obs = len(g)
        n_draw = int(g["draw_event"].sum())
        n_rep = int(g["rep_event"].sum())
        n_rc = int(g["rc_event"].sum())
        p_draw = n_draw / n_obs if n_obs else 0.0
        p_rep = n_rep / n_obs if n_obs else 0.0
        p_rc = n_rc / n_rep if n_rep else 0.0
        row = {
            "level": level_name,
            "n_obs": n_obs,
            "n_draw": n_draw,
            "n_rep": n_rep,
            "n_rc": n_rc,
            "p_draw": p_draw,
            "p_rep": p_rep,
            "p_rc_given_rep": p_rc,
        }
        for idx, col in enumerate(cols):
            row[col] = gkey[idx]
        rows.append(row)
    return rows

lvl_age = _fit_level("strategy_grade_age", ["Adj Strategy", "Grade", "AgeBucket"], MIN_OBS_AGE)
lvl_sg = _fit_level("strategy_grade", ["Adj Strategy", "Grade"], MIN_OBS_SG)
lvl_s = _fit_level("strategy", ["Adj Strategy"], MIN_OBS_S)

by_group = pd.DataFrame(lvl_age + lvl_sg + lvl_s)
by_group.to_csv(Path(OUT_DIR) / "timing_probs_by_group.csv", index=False)

  for gkey, g in df.groupby(cols):


In [6]:
# --- Selected with fallback ---

base_groups = df[["Adj Strategy", "Grade", "AgeBucket"]].dropna().drop_duplicates()
selected_rows = []

# global row
n_obs = len(df)
if n_obs:
    n_draw = int(df["draw_event"].sum())
    n_rep = int(df["rep_event"].sum())
    n_rc = int(df["rc_event"].sum())
    global_row = {
        "level": "global",
        "n_obs": n_obs,
        "n_draw": n_draw,
        "n_rep": n_rep,
        "n_rc": n_rc,
        "p_draw": n_draw / n_obs if n_obs else 0.0,
        "p_rep": n_rep / n_obs if n_obs else 0.0,
        "p_rc_given_rep": n_rc / n_rep if n_rep else 0.0,
    }
else:
    global_row = {}

for _, r in base_groups.iterrows():
    s, g, a = r["Adj Strategy"], r["Grade"], r["AgeBucket"]
    row = None
    if not by_group.empty:
        m = (by_group["level"] == "strategy_grade_age") & (by_group["Adj Strategy"] == s) & (by_group["Grade"] == g) & (by_group["AgeBucket"] == a)
        if m.any():
            row = by_group.loc[m].iloc[0].to_dict()
    if row is None and not by_group.empty:
        m = (by_group["level"] == "strategy_grade") & (by_group["Adj Strategy"] == s) & (by_group["Grade"] == g)
        if m.any():
            row = by_group.loc[m].iloc[0].to_dict()
    if row is None and not by_group.empty:
        m = (by_group["level"] == "strategy") & (by_group["Adj Strategy"] == s)
        if m.any():
            row = by_group.loc[m].iloc[0].to_dict()
    if row is None and global_row:
        row = dict(global_row)
        row["level"] = "global"

    if row:
        row["Adj Strategy"] = s
        row["Grade"] = g
        row["AgeBucket"] = a
        selected_rows.append(row)

selected = pd.DataFrame(selected_rows)
selected.to_csv(Path(OUT_DIR) / "timing_probs_selected.csv", index=False)

print("Wrote:", Path(OUT_DIR) / "timing_probs_by_group.csv")
print("Wrote:", Path(OUT_DIR) / "timing_probs_selected.csv")

Wrote: model_fits/runs/2025Q3/calibration/timing_probs_by_group.csv
Wrote: model_fits/runs/2025Q3/calibration/timing_probs_selected.csv
