# Omega Calibration (NAV Logic‑style)

Calibrate omega as:

omega = (NAV_t − NAV_{t-1} − net_flow) / NAV_{t-1}

Then regress omega on MSCI returns (current + lag) by Strategy×Grade
and estimate alpha/sigma by Strategy×Grade×AgeBucket with fallback.

In [None]:
from pathlib import Path
import os
import numpy as np
import pandas as pd

In [None]:
INPUT_PATH = "anonymized.csv"
MSCI_PATH = "msci.xlsx"  # update if needed
OUT_DIR = "model_fits/outputs"

if not Path(INPUT_PATH).exists():
    candidates = list(Path.cwd().glob("**/anonymized.csv"))
    if candidates:
        INPUT_PATH = str(candidates[0])
    else:
        raise FileNotFoundError("anonymized.csv not found. Set INPUT_PATH to the full path.")

if not Path(MSCI_PATH).exists():
    # try common names in project root
    for p in ["MSCI.xlsx", "msci.xlsx"]:
        if Path(p).exists():
            MSCI_PATH = p
            break

print("Using INPUT_PATH:", INPUT_PATH)
print("Using MSCI_PATH:", MSCI_PATH)
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
AGE_BINS_Q = [-1, 3, 7, 11, 15, 19, 1000]
AGE_LABELS = ["0-3", "4-7", "8-11", "12-15", "16-19", "20+"]


def _norm_key(s: str) -> str:
    return " ".join(s.strip().lower().replace("_", " ").split())


def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
    col_map = {_norm_key(c): c for c in df.columns}
    def _get(name: str) -> str:
        k = _norm_key(name)
        return col_map.get(k, name)

    rename = {}
    rename[_get("Adj strategy")] = "Adj Strategy"
    rename[_get("Adj Strategy")] = "Adj Strategy"
    rename[_get("Quarter of Transaction Date")] = "Quarter"
    rename[_get("Year of Transaction Date")] = "Year"
    rename[_get("FundID")] = "FundID"
    rename[_get("Grade")] = "Grade"
    rename[_get("Current Grade")] = "Grade_Current"
    rename[_get("CurrentGrade")] = "Grade_Current"
    rename[_get("Grade Current")] = "Grade_Current"
    rename[_get("Grade_Current")] = "Grade_Current"
    rename[_get("Adj Drawdown EUR")] = "Adj Drawdown EUR"
    rename[_get("Adj Repayment EUR")] = "Adj Repayment EUR"
    rename[_get("NAV Adjusted EUR")] = "NAV Adjusted EUR"
    rename[_get("Fund_Age_Quarters")] = "Fund_Age_Quarters"
    return df.rename(columns=rename)


def parse_quarter(q) -> float:
    if pd.isna(q):
        return np.nan
    if isinstance(q, (int, np.integer, float, np.floating)):
        return float(q)
    s = str(q).strip().upper()
    if s.startswith("Q"):
        s = s[1:]
    try:
        return float(s)
    except Exception:
        return np.nan


def add_quarter_end(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["Quarter"] = df["Quarter"].apply(parse_quarter)
    df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
    m = df["Year"].notna() & df["Quarter"].notna()
    years = df.loc[m, "Year"].astype(int)
    quarters = df.loc[m, "Quarter"].astype(int)
    df.loc[m, "quarter_end"] = pd.PeriodIndex(year=years, quarter=quarters, freq="Q").to_timestamp("Q")
    return df


def apply_current_grade(df: pd.DataFrame, context: str = "") -> pd.DataFrame:
    df = df.copy()
    if "Grade_Current" in df.columns:
        df["Grade"] = df["Grade_Current"]
        if context:
            print(f"Using Grade_Current for {context}.")
        return df
    if all(c in df.columns for c in ["Grade", "FundID", "quarter_end"]):
        df["Grade"] = df["Grade"].astype(str).str.strip()
        df.loc[df["Grade"].isin(["", "nan", "None", "NaN", "<NA>"]), "Grade"] = np.nan
        df = df.sort_values(["FundID", "quarter_end"])
        df["Grade_Current"] = df.groupby("FundID")["Grade"].ffill()
        df["Grade"] = df["Grade_Current"]
        if context:
            print(f"Computed Grade_Current (forward fill) for {context}.")
    return df


def load_msci_quarterly(msci_xlsx_path: str) -> pd.DataFrame:
    msci = pd.read_excel(msci_xlsx_path)
    if "Date" not in msci.columns or "SCXP Index" not in msci.columns:
        raise ValueError("MSCI file must contain columns: 'Date' and 'SCXP Index'")
    msci = msci[["Date", "SCXP Index"]].copy()
    msci["Date"] = pd.to_datetime(msci["Date"], errors="coerce")
    msci["SCXP Index"] = pd.to_numeric(msci["SCXP Index"], errors="coerce")
    msci = msci.dropna(subset=["Date", "SCXP Index"]).sort_values("Date")
    msci["quarter_end"] = msci["Date"].dt.to_period("Q").dt.to_timestamp("Q")
    q = (msci.groupby("quarter_end", as_index=False)["SCXP Index"]
         .last()
         .rename(columns={"SCXP Index": "index_level"})
         .sort_values("quarter_end")
         .reset_index(drop=True))
    q["msci_ret_q"] = q["index_level"].pct_change()
    q = q.dropna(subset=["msci_ret_q"]).reset_index(drop=True)
    return q


def fit_ols_beta(y: np.ndarray, x: np.ndarray):
    X = np.column_stack([np.ones(len(y)), x])
    try:
        beta = np.linalg.lstsq(X, y, rcond=None)[0]
    except Exception:
        beta = np.zeros(3, dtype=float)
    return float(beta[0]), float(beta[1]), float(beta[2])

In [None]:
# --- Load data + compute omega ---

df = pd.read_csv(INPUT_PATH, engine="python")
df = normalize_columns(df)
df = add_quarter_end(df)
df = apply_current_grade(df, context="omega")

# Age buckets
if "Fund_Age_Quarters" in df.columns:
    df["AgeBucket"] = pd.cut(pd.to_numeric(df["Fund_Age_Quarters"], errors="coerce"),
                             bins=AGE_BINS_Q, labels=AGE_LABELS)
else:
    df["AgeBucket"] = "ALL"

# omega from NAV and flows

df = df.sort_values(["FundID", "quarter_end"])
df["nav_prev"] = df.groupby("FundID")["NAV Adjusted EUR"].shift(1)
df["flow_net"] = pd.to_numeric(df["Adj Drawdown EUR"], errors="coerce").fillna(0.0) -                  pd.to_numeric(df["Adj Repayment EUR"], errors="coerce").fillna(0.0)

m = df["nav_prev"].abs() > 1.0

df["omega"] = np.nan
df.loc[m, "omega"] = ((df.loc[m, "NAV Adjusted EUR"] - df.loc[m, "nav_prev"]) - df.loc[m, "flow_net"]) / df.loc[m, "nav_prev"]

df = df.dropna(subset=["omega", "Adj Strategy", "Grade", "AgeBucket"])

In [None]:
# --- Merge MSCI ---

msci_q = load_msci_quarterly(MSCI_PATH)
msci_q["msci_ret_q_lag1"] = msci_q["msci_ret_q"].shift(1)

msci_map = msci_q.set_index("quarter_end")[["msci_ret_q", "msci_ret_q_lag1"]]

df = df.join(msci_map, on="quarter_end")

df = df.dropna(subset=["msci_ret_q", "msci_ret_q_lag1"])
print("calibration rows:", len(df))

In [None]:
# --- Fit omega model (NAV Logic style) ---

cal = df.copy()

# Betas by (strategy, grade), fallback to strategy, then global
betas_sg = {}
betas_s = {}

# Global

y = cal["omega"].to_numpy(dtype=float)
x = cal[["msci_ret_q", "msci_ret_q_lag1"]].to_numpy(dtype=float)
a_g, b0_g, b1_g = fit_ols_beta(y, x)
betas_g = (a_g, b0_g, b1_g)

# Strategy-level
for s, grp in cal.groupby("Adj Strategy"):
    y = grp["omega"].to_numpy(dtype=float)
    x = grp[["msci_ret_q", "msci_ret_q_lag1"]].to_numpy(dtype=float)
    betas_s[s] = fit_ols_beta(y, x)

# Strategy+grade
for (s, g), grp in cal.groupby(["Adj Strategy", "Grade"]):
    if len(grp) < 20:
        continue
    y = grp["omega"].to_numpy(dtype=float)
    x = grp[["msci_ret_q", "msci_ret_q_lag1"]].to_numpy(dtype=float)
    betas_sg[(s, g)] = fit_ols_beta(y, x)

# Choose best available betas

def get_betas(strategy: str, grade: str):
    if (strategy, grade) in betas_sg:
        return betas_sg[(strategy, grade)]
    if strategy in betas_s:
        return betas_s[strategy]
    return betas_g

cal2 = cal.copy()
b0_list = []
b1_list = []
for _, r in cal2.iterrows():
    a, b0, b1 = get_betas(r["Adj Strategy"], r["Grade"])
    b0_list.append(b0)
    b1_list.append(b1)
cal2["b0_used"] = b0_list
cal2["b1_used"] = b1_list
cal2["omega_adj"] = cal2["omega"] - cal2["b0_used"] * cal2["msci_ret_q"] - cal2["b1_used"] * cal2["msci_ret_q_lag1"]

# Alpha by (strategy, grade, age)
alpha_sga = {}
alpha_sg = {}
alpha_s = {}

for (s, g, a), grp in cal2.groupby(["Adj Strategy", "Grade", "AgeBucket"]):
    if len(grp) < 10:
        continue
    alpha_sga[(s, g, a)] = float(grp["omega_adj"].mean())

for (s, g), grp in cal2.groupby(["Adj Strategy", "Grade"]):
    if len(grp) < 10:
        continue
    alpha_sg[(s, g)] = float(grp["omega_adj"].mean())

for s, grp in cal2.groupby(["Adj Strategy"]):
    alpha_s[s] = float(grp["omega_adj"].mean())

alpha_g = float(cal2["omega_adj"].mean())

# Sigma by (strategy, grade)

sigma_sg = {}
sigma_s = {}

cal2["omega_resid"] = cal2["omega_adj"] - cal2.groupby(["Adj Strategy", "Grade"])["omega_adj"].transform("mean")

for (s, g), grp in cal2.groupby(["Adj Strategy", "Grade"]):
    if len(grp) < 20:
        continue
    sig = float(grp["omega_resid"].std(ddof=1))
    if np.isfinite(sig) and sig > 0:
        sigma_sg[(s, g)] = sig

for s, grp in cal2.groupby(["Adj Strategy"]):
    sig = float(grp["omega_adj"].std(ddof=1))
    if np.isfinite(sig) and sig > 0:
        sigma_s[s] = sig

sigma_g = float(cal2["omega_adj"].std(ddof=1))
if not np.isfinite(sigma_g) or sigma_g <= 0:
    sigma_g = 0.05


def get_alpha(strategy: str, grade: str, age_bucket: str):
    if (strategy, grade, age_bucket) in alpha_sga:
        return alpha_sga[(strategy, grade, age_bucket)], "sga"
    if (strategy, grade) in alpha_sg:
        return alpha_sg[(strategy, grade)], "sg"
    if strategy in alpha_s:
        return alpha_s[strategy], "s"
    return alpha_g, "g"


def get_sigma(strategy: str, grade: str):
    if (strategy, grade) in sigma_sg:
        return sigma_sg[(strategy, grade)], "sg"
    if strategy in sigma_s:
        return sigma_s[strategy], "s"
    return sigma_g, "g"

In [None]:
# --- Build selected omega table ---

rows = []
unique_groups = cal2[["Adj Strategy", "Grade", "AgeBucket"]].drop_duplicates()
for _, r in unique_groups.iterrows():
    s, g, a = r["Adj Strategy"], r["Grade"], r["AgeBucket"]
    a0, b0, b1 = get_betas(s, g)
    alpha, alpha_src = get_alpha(s, g, a)
    sigma, sigma_src = get_sigma(s, g)
    rows.append({
        "Adj Strategy": s,
        "Grade": g,
        "AgeBucket": a,
        "a_intercept": a0,
        "b0": b0,
        "b1": b1,
        "alpha": alpha,
        "sigma": sigma,
        "alpha_src": alpha_src,
        "sigma_src": sigma_src,
    })

omega_sel = pd.DataFrame(rows)

omega_sel.to_csv(Path(OUT_DIR) / "omega_selected.csv", index=False)
print("Wrote:", Path(OUT_DIR) / "omega_selected.csv")