# NAV Anchor Calibration
Data-driven calibration of:
- target NAV/paid-in by strategy and age bucket
- liquidation horizon (quarters after planned end until NAV/paid-in <= threshold)
- mean-reversion lambda (overall and late-life)


In [1]:

import os
from pathlib import Path
import numpy as np
import pandas as pd
ROOT = Path.cwd()
if ROOT.name == 'model_fits':
    ROOT = ROOT.parent

RUN_TAG = os.environ.get("RUN_TAG", "2025Q3")
CALIB_DIR = ROOT / "model_fits" / "runs" / RUN_TAG / "calibration"
CALIB_DIR.mkdir(parents=True, exist_ok=True)

# locate anonymized.csv
INPUT_PATH = os.environ.get("INPUT_PATH")
if not INPUT_PATH or not Path(INPUT_PATH).exists():
    # search upwards and cwd
    candidates = []
    for base in [Path.cwd(), Path.cwd().parent, Path.cwd().parent.parent]:
        if base.exists():
            candidates += list(base.glob("anonymized.csv"))
    if not candidates:
        candidates = list(Path.cwd().glob("**/anonymized.csv"))
    if candidates:
        INPUT_PATH = str(candidates[0])
    else:
        raise FileNotFoundError("anonymized.csv not found")

print("Using INPUT_PATH:", INPUT_PATH)
print("Output folder:", CALIB_DIR)


Using INPUT_PATH: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/model_fits/test_portfolio.csv
Output folder: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/model_fits/runs/test_portfolio_2025Q3/calibration


In [2]:

# --- Load data ---

df = pd.read_csv(INPUT_PATH, engine="python")

# normalize columns
col_map = {c.strip(): c for c in df.columns}
# handle common variants
if "Adj strategy" in col_map and "Adj Strategy" not in df.columns:
    df = df.rename(columns={"Adj strategy": "Adj Strategy"})
if "Adj Strategy" not in df.columns:
    for c in df.columns:
        if "strategy" in c.lower():
            df = df.rename(columns={c: "Adj Strategy"})
            break

# normalize columns (minimal)
col_map = {}
for c in df.columns:
    c2 = c.strip()
    col_map[c] = c2

df = df.rename(columns=col_map)

# required columns
required = ["FundID", "Adj Strategy", "Adj Drawdown EUR", "NAV Adjusted EUR", "Year", "Quarter"]
# Year/Quarter columns may be named differently in anonymized data
if "Year" not in df.columns:
    if "Year of Transaction Date" in df.columns:
        df = df.rename(columns={"Year of Transaction Date":"Year"})
if "Quarter" not in df.columns:
    if "Quarter" in df.columns:
        pass
    elif "Quarter of Transaction Date" in df.columns:
        df = df.rename(columns={"Quarter of Transaction Date":"Quarter"})

# parse quarter_end

def parse_quarter(q):
    if pd.isna(q):
        return np.nan
    if isinstance(q, (int, np.integer, float, np.floating)):
        return int(q)
    s = str(q).strip().upper()
    if s.startswith("Q"):
        s = s[1:]
    try:
        return int(float(s))
    except Exception:
        return np.nan

df["Year"] = pd.to_numeric(df.get("Year"), errors="coerce")
df["Quarter"] = df.get("Quarter").apply(parse_quarter)
mask = df["Year"].notna() & df["Quarter"].notna()
if mask.any():
    yrs = df.loc[mask, "Year"].astype(int)
    qtrs = df.loc[mask, "Quarter"].astype(int)
    df.loc[mask, "quarter_end"] = pd.PeriodIndex(year=yrs, quarter=qtrs, freq="Q").to_timestamp("Q")
else:
    raise ValueError("Year/Quarter not found for quarter_end")

# numeric casts
for c in ["Adj Drawdown EUR", "Adj Repayment EUR", "NAV Adjusted EUR", "Commitment EUR", "draw_cum_prev"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# planned end date column
planned_candidates = [
    "Planned End Date",
    "Planned End Date as per legal documentation",
    "Planned end date with add. years as per legal doc",
]
planned_col = None
for c in planned_candidates:
    if c in df.columns:
        planned_col = c
        break
if planned_col is None:
    # try fuzzy match
    for c in df.columns:
        if "planned" in c.lower() and "end" in c.lower():
            planned_col = c
            break

if planned_col:
    df["planned_end"] = pd.to_datetime(df[planned_col], errors="coerce")
else:
    df["planned_end"] = pd.NaT

print("Planned end col:", planned_col)


Planned end col: Planned End Date


  df.loc[mask, "quarter_end"] = pd.PeriodIndex(year=yrs, quarter=qtrs, freq="Q").to_timestamp("Q")


In [3]:

# --- Build cumulative metrics ---

df = df.sort_values(["FundID", "quarter_end"])

# commitment by fund
if "Commitment EUR" in df.columns:
    commit_map = df.groupby("FundID")["Commitment EUR"].max().rename("commitment")
else:
    commit_map = pd.Series(0, index=df["FundID"].unique())

# cumulative draw and paid-in
if "Adj Drawdown EUR" in df.columns:
    df["draw_cum"] = df.groupby("FundID")["Adj Drawdown EUR"].cumsum().abs()
else:
    df["draw_cum"] = 0.0

# paid-in = cumulative draw
paid_in = df["draw_cum"].copy()

# NAV
nav = df.get("NAV Adjusted EUR", 0).abs()

# nav/paid-in
r = np.where(paid_in > 0, nav / paid_in, np.nan)

df["nav_to_paidin"] = r

# Age buckets (quarters)
AGE_BINS_Q = [-1, 3, 7, 11, 15, 19, 1000]
AGE_LABELS = ["0-3", "4-7", "8-11", "12-15", "16-19", "20+"]

if "Fund_Age_Quarters" in df.columns:
    age_q = pd.to_numeric(df["Fund_Age_Quarters"], errors="coerce")
else:
    # derive from first close if possible
    if "First Closing Date" in df.columns:
        fc = pd.to_datetime(df["First Closing Date"], errors="coerce")
        age_q = (df["quarter_end"].dt.to_period("Q").astype(int) - fc.dt.to_period("Q").astype(int))
    else:
        age_q = np.nan

df["AgeBucket"] = pd.cut(age_q, bins=AGE_BINS_Q, labels=AGE_LABELS)


In [4]:

# --- NAV target by strategy + age bucket ---

target = (df.groupby(["Adj Strategy", "AgeBucket"], dropna=False)["nav_to_paidin"]
          .mean()
          .reset_index()
          .rename(columns={"nav_to_paidin":"avg_nav_to_paidin"}))

out_targets = CALIB_DIR / "nav_anchor_targets.csv"
target.to_csv(out_targets, index=False)
print("Wrote:", out_targets)


Wrote: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/model_fits/runs/test_portfolio_2025Q3/calibration/nav_anchor_targets.csv


  target = (df.groupby(["Adj Strategy", "AgeBucket"], dropna=False)["nav_to_paidin"]


In [5]:

# --- Calibrate liquidation horizon (quarters after planned end to NAV/paid-in <= THRESH) ---

THRESH = float(os.environ.get("NAV_LIQ_THRESHOLD", "0.05"))
liq_rows = []

q = df.dropna(subset=["planned_end"]).copy()

for fid, g in q.groupby("FundID"):
    strat = g["Adj Strategy"].iloc[0]
    ped = g["planned_end"].iloc[0]
    if pd.isna(ped):
        continue
    ped_q = pd.Period(ped, freq="Q").to_timestamp("Q")
    g2 = g[g["quarter_end"] >= ped_q].copy()
    if g2.empty:
        continue
    hit = g2[g2["nav_to_paidin"].notna() & (g2["nav_to_paidin"] <= THRESH)]
    if hit.empty:
        end_q = g2["quarter_end"].max()
        q_to = int(pd.Period(end_q, freq="Q").ordinal - pd.Period(ped_q, freq="Q").ordinal) + 1
        liq_rows.append({"FundID": fid, "Adj Strategy": strat, "liq_q": q_to, "censored": 1})
    else:
        end_q = hit["quarter_end"].min()
        q_to = int(pd.Period(end_q, freq="Q").ordinal - pd.Period(ped_q, freq="Q").ordinal) + 1
        liq_rows.append({"FundID": fid, "Adj Strategy": strat, "liq_q": q_to, "censored": 0})

liq_df = pd.DataFrame(liq_rows)
if liq_df.empty:
    liq_summary = pd.DataFrame(columns=["Adj Strategy","liq_q_median","liq_q_p75","liq_q_p90","n_funds","censored_rate"])
else:
    liq_summary = (liq_df.groupby("Adj Strategy")
                   .agg(liq_q_median=("liq_q","median"),
                        liq_q_p75=("liq_q", lambda s: s.quantile(0.75)),
                        liq_q_p90=("liq_q", lambda s: s.quantile(0.90)),
                        n_funds=("FundID","nunique"),
                        censored_rate=("censored","mean"))
                   .reset_index())

print(liq_summary)


Empty DataFrame
Columns: [Adj Strategy, liq_q_median, liq_q_p75, liq_q_p90, n_funds, censored_rate]
Index: []


In [6]:

# --- Calibrate mean-reversion lambda (overall and late-life) ---

# map target by strategy+age bucket
nt = pd.read_csv(CALIB_DIR / "nav_anchor_targets.csv")
nt_map = {(r["Adj Strategy"], r["AgeBucket"]): r["avg_nav_to_paidin"] for _, r in nt.iterrows()}

# build r and r_next per fund
q2 = df.copy()
q2["r"] = pd.to_numeric(q2["nav_to_paidin"], errors="coerce")
q2["r_next"] = q2.groupby("FundID")["r"].shift(-1)
q2["target"] = q2.apply(lambda r: nt_map.get((r["Adj Strategy"], r["AgeBucket"]), np.nan), axis=1)
q2["r"] = pd.to_numeric(q2["r"], errors="coerce")
q2["target"] = pd.to_numeric(q2["target"], errors="coerce")
q2 = q2.dropna(subset=["r","r_next","target"])
q2["dev"] = q2["r"] - q2["target"]
q2["dr"] = q2["r_next"] - q2["r"]

NAV_END_AGE_BUCKET = os.environ.get("NAV_END_AGE_BUCKET", "16-19")

lam_rows = []
for strat, g in q2.groupby("Adj Strategy"):
    # overall lambda
    denom = (g["dev"] ** 2).sum()
    num = -(g["dev"] * g["dr"]).sum()
    lam = num / denom if denom > 0 else np.nan
    # late-life lambda_end
    g_end = g[g["AgeBucket"].isin([NAV_END_AGE_BUCKET, "20+"])]
    denom_end = (g_end["dev"] ** 2).sum()
    num_end = -(g_end["dev"] * g_end["dr"]).sum()
    lam_end = num_end / denom_end if denom_end > 0 else np.nan
    lam_rows.append({
        "Adj Strategy": strat,
        "AgeBucket": np.nan,
        "lambda": lam,
        "lambda_end": lam_end,
        "n_obs": len(g),
        "n_obs_end": len(g_end),
    })

lam_df = pd.DataFrame(lam_rows)

# ensure lam_df has Adj Strategy column
if "Adj Strategy" not in lam_df.columns:
    lam_df = pd.DataFrame(columns=["Adj Strategy","AgeBucket","lambda","lambda_end","n_obs","n_obs_end"])

# ensure liq_summary has Adj Strategy column
if "Adj Strategy" not in liq_summary.columns:
    liq_summary = pd.DataFrame(columns=["Adj Strategy","liq_q_median","liq_q_p75","liq_q_p90","n_funds","censored_rate"])

# merge liq_summary
out = liq_summary.merge(lam_df, on="Adj Strategy", how="outer")

out_path = CALIB_DIR / "nav_anchor_calibration.csv"
out.to_csv(out_path, index=False)
print("Wrote:", out_path)
print(out.head())


Wrote: /Users/mozeramozali/Desktop/Equity-Cashflow-projection/model_fits/runs/test_portfolio_2025Q3/calibration/nav_anchor_calibration.csv
Empty DataFrame
Columns: [Adj Strategy, liq_q_median, liq_q_p75, liq_q_p90, n_funds, censored_rate, AgeBucket, lambda, lambda_end, n_obs, n_obs_end]
Index: []
