In [1]:
# Parameters
INPUT_CSV = "C:/Users/Jason Pohl/OneDrive - Bond University/PhD/rff/NEW_DATA.csv"
OUTPUT_ROOT = "C:/Users/Jason Pohl/OneDrive - Bond University/PhD/rff/outputs_rff"
STAGE1_CFG = ""
SWAN_YEAR = 2008
WIN_START = 2004
WIN_END = 2012
RUN_TAG = "myUniqueRunId"


In [2]:
"""
STAGE 03 · EXTENDED RATIO FACTORY
────────────────────────────────────────────────────────────────────────
• Zero manual input:
      – SWAN_YEAR / RUN_DATE logic identical to Stages 01-02.
      – All paths and tweaks come from pipeline_config.yaml.
• Re-uses `data_stage_2` in-memory if the previous stage ran in the same
  session; otherwise loads the latest Stage 02 CSV.
• Hundreds of ratios (see `ratio_funcs` & `derived_ratio_funcs`) are
  calculated, winsorised, audited and saved.

Outputs
  <OUTPUT_ROOT>/event=<SWAN_YEAR>/<RUN_DATE>/stage03/
      ├─ Stage3_Data_WithRatios.csv        (raw + winsor columns)
      ├─ Stage3_RatioCoverage.csv          (% available / winsor caps)
      └─ Stage3_SkippedRatios.csv          (errors & skips)
The full DataFrame is also returned as `data_stage_3` when run in-session.
"""
from __future__ import annotations
from pathlib import Path
import os, sys, logging, yaml, io
from typing import Dict, List

import numpy as np
import pandas as pd
from scipy.stats import skew as _scipy_skew          # for helper
# ──────────────────────────────────────────────────────────────────────
# 0-3 · UNIVERSAL BOOTSTRAP (cfg, run-resolver, params, logger)
#       → replaces the old ad-hoc path logic
# ──────────────────────────────────────────────────────────────────────
from pipeline_utils import load_cfg, resolve_run_dir      # NEW shared helper
from pathlib import Path
import os, sys, logging, io
from typing import Dict, List

# 0 · read YAML once ---------------------------------------------------
CFG          = load_cfg()
DEFAULTS: Dict = CFG.get("defaults", {})
EVENTS:   Dict = {str(k): v for k, v in CFG.get("events", {}).items()}
ST3_OVR:   Dict = CFG.get("stage3", {})          # optional per-stage overrides

# 1 · runtime params ---------------------------------------------------
SWAN_YEAR = str(os.getenv("SWAN_YEAR") or next(iter(EVENTS)))
if SWAN_YEAR not in EVENTS:
    raise KeyError(f"SWAN_YEAR={SWAN_YEAR} not in YAML `events:` block")

WINSOR_PCT = float(os.getenv("WINSOR_PCT", ST3_OVR.get("winsor_pct", 0.01)))
DATE_COL   = ST3_OVR.get("date_col",  "ReportDate")
ID_COL     = ST3_OVR.get("id_col",    "Symbol")

# 2 · resolve run folder & paths --------------------------------------
#     – honour $RUN_DIR or $RUN_DATE if set
#     – otherwise pick latest run that already contains Stage-2 output
RUN_DIR   = resolve_run_dir(must_have="stage02/Stage2_Data_WithMetrics.csv")
RUN_DATE  = RUN_DIR.name

STAGE2_FILE = RUN_DIR / "stage02" / "Stage2_Data_WithMetrics.csv"
OUT_DIR     = RUN_DIR / "stage03"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# 3 · logger -----------------------------------------------------------
if "logger" in globals() and isinstance(globals()["logger"], logging.Logger):
    logger = logging.getLogger(__name__)
    if not any(isinstance(h, logging.FileHandler)
               and h.baseFilename.endswith("stage03.log") for h in logger.handlers):
        logger.addHandler(logging.FileHandler(OUT_DIR / "stage03.log",
                                              mode="w", encoding="utf-8"))
else:  # stand-alone execution
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s | %(levelname)-7s | %(message)s",
        handlers=[
            logging.FileHandler(OUT_DIR / "stage03.log", mode="w", encoding="utf-8"),
            logging.StreamHandler(sys.stdout),
        ],
    )
    logger = logging.getLogger(__name__)

logger.info("==========  STAGE 03: EXTENDED RATIO FACTORY ==========")
logger.info("RUN_DIR        : %s", RUN_DIR)
logger.info("SWAN_YEAR=%s  RUN_DATE=%s  WINSOR_PCT=%s", SWAN_YEAR, RUN_DATE, WINSOR_PCT)
logger.info("DATE_COL / ID_COL = %s / %s", DATE_COL, ID_COL)
# ──────────────────────────────────────────────────────────────────────
# ──────────────────────────────────────────────────────────────────────
# 4 · LOAD WORK DATAFRAME  (memory → disk fallback)
# ──────────────────────────────────────────────────────────────────────
if "data_stage_2" in globals():
    df = globals()["data_stage_2"].copy()
    logger.info("Stage 02 data reused from memory (%d rows).", len(df))
else:
    if not STAGE2_FILE.is_file():
        raise FileNotFoundError(f"Stage 02 CSV not found at {STAGE2_FILE}")
    df = pd.read_csv(STAGE2_FILE, low_memory=False, parse_dates=[DATE_COL])
    logger.info("Stage 02 CSV loaded: %d rows", len(df))

# ensure chronological order
df = df.sort_values([ID_COL, DATE_COL])
# ──────────────────────────────────────────────────────────────────────
# 5 · NUMERIC HELPERS
# ──────────────────────────────────────────────────────────────────────
def safe_div(a, b):
    """Vectorised division returning NaN for 0 / NaN / ±inf denominator."""
    with np.errstate(divide="ignore", invalid="ignore"):
        out = a / b
    if isinstance(out, pd.Series):
        return out.replace([np.inf, -np.inf], np.nan)
    return np.where(np.isfinite(out), out, np.nan)

def winsor(s: pd.Series, pct: float = 0.01) -> pd.Series:
    if not 0 < pct < 0.5:
        raise ValueError("pct must be between 0 and 0.5")
    lo, hi = s.quantile(pct), s.quantile(1 - pct)
    return s.clip(lo, hi)

def slope(series: pd.Series, window: int) -> float:
    y = series.dropna().tail(window)
    if len(y) < 2:
        return np.nan
    x = np.arange(len(y), dtype=float)
    try:
        m, _ = np.polyfit(x, y.values.astype(float), 1)
        return m
    except Exception:
        return np.nan

def skew(series: pd.Series, window: int) -> pd.Series:
    return series.rolling(window, min_periods=3).apply(
        lambda x: _scipy_skew(x, bias=False) if x.notna().sum() >= 3 else np.nan,
        raw=False
    )

def _to_series(x, index):
    """Normalise any function output to float Series aligned with *index*."""
    if isinstance(x, pd.Series):
        return x.reindex(index).astype("float64")
    if isinstance(x, pd.DataFrame):
        num_cols = [c for c in x if pd.api.types.is_numeric_dtype(x[c])]
        if not num_cols:
            raise ValueError("DataFrame output has no numeric columns")
        first = x[num_cols[0]]
        if len(num_cols) == 1 or all(first.equals(x[c]) for c in num_cols[1:]):
            return first.reindex(index).astype("float64")
        return x[num_cols].mean(axis=1).reindex(index).astype("float64")
    if np.isscalar(x):
        return pd.Series(float(x), index=index, dtype="float64")
    raise ValueError(f"Unsupported output type: {type(x)}")

# ──────────────────────────────────────────────────────────────────────
# 6 · RATIO DEFINITIONS  (verbatim from user’s library)
#     – we keep them local for a self-contained file
# ──────────────────────────────────────────────────────────────────────
ratio_funcs = {

    #### PREPARE STAGES
         # ───────────────────────────────────── PHYSICAL · PREPARE
    "Cash_to_Total_Assets":            lambda C: safe_div(C["Cash"].fillna(0), C["TotalAssets"]),
    "Cash_to_Total_Assets_Alt":        lambda C: safe_div(C["CashAndCashEquivalents"].fillna(0), C["TotalAssets"]),
    "Current_Ratio":                   lambda C: safe_div(C["CurrentAssets"], C["CurrentLiabilities"]),
    "Quick_Ratio":                     lambda C: safe_div(C["CurrentAssets"] - C["Inventory"], C["CurrentLiabilities"]),
    "Cash_Ratio":                      lambda C: safe_div(C["CashAndCashEquivalents"], C["CurrentLiabilities"]),
    "Operating_Cash_Flow_Ratio":       lambda C: safe_div(C["OperatingCashFlow"], C["CurrentLiabilities"]),
    "NetWorkingCapital_to_Assets":     lambda C: safe_div(C["CurrentAssets"] - C["CurrentLiabilities"], C["TotalAssets"]),
    "Cash_Conversion_Cycle":           lambda C: C["AccountsReceivableDays"] + C["InventoryDays"] - C["AccountsPayableDays"],
    "Operating_CF_to_Debt":            lambda C: safe_div(C["OperatingCashFlow"], C["TotalDebt"]),
    "NetDebt_to_OCF":                  lambda C: safe_div(C["TotalDebt"] - C["CashAndCashEquivalents"], C["OperatingCashFlow"]),
    "DaysPayablesOutstanding":         lambda C: safe_div(C["AccountsPayable"], safe_div(C["CostOfRevenue"], 365)),
    "WorkingCapital_to_Sales":         lambda C: safe_div(C["CurrentAssets"] - C["CurrentLiabilities"], C["TotalRevenue"]),
    "CashEquivalents_to_CurrentLiab":  lambda C: safe_div(C["CashAndCashEquivalents"], C["CurrentLiabilities"]),
    "AccountsReceivable_Turnover":     lambda C: safe_div(C["TotalRevenue"], C["AccountsReceivable"]),
    "Inventory_Turnover":              lambda C: safe_div(C["CostOfRevenue"], C["Inventory"]),
    "Net_Operating_WC_to_Assets":      lambda C: safe_div(C["AccountsReceivable"] + C["Inventory"] - C["AccountsPayable"], C["TotalAssets"]),
    "CashFlow_to_Debt":                lambda C: safe_div(C["OperatingCashFlow"], C["TotalDebt"]),
    "CashFlow_to_Assets":              lambda C: safe_div(C["OperatingCashFlow"], C["TotalAssets"]),
    "FCF_to_Assets":                   lambda C: safe_div(C["FreeCashFlow"], C["TotalAssets"]),
    "CashFlow_Coverage_of_Interest":   lambda C: safe_div(C["OperatingCashFlow"], C["InterestExpense"]),
    "OCF_to_NetIncome":                lambda C: safe_div(C["OperatingCashFlow"], C["NetIncome"]),
    "DaysSalesOutstanding":            lambda C: safe_div(C["AccountsReceivable"], safe_div(C["TotalRevenue"], 365)),
    "DaysInventoryOutstanding":        lambda C: safe_div(C["Inventory"], safe_div(C["CostOfRevenue"], 365)),
    "OperatingCycle":                  lambda C: safe_div(C["AccountsReceivable"], safe_div(C["TotalRevenue"], 365)) + safe_div(C["Inventory"], safe_div(C["CostOfRevenue"], 365)),
    "BeginningCash_to_Sales":          lambda C: safe_div(C["BeginningCashPosition"], C["TotalRevenue"]),
    "RetainedEarnings_to_Assets":      lambda C: safe_div(C["RetainedEarnings"], C["TotalAssets"]),
    "Deposits_to_Assets":              lambda C: safe_div(C.get("TotalDeposits", 0), C["TotalAssets"]),
    "Cash_Burn_Duration":              lambda C: safe_div(C["CashAndCashEquivalents"], safe_div(C["CostOfRevenue"] + C["SellingGeneralAndAdministration"], 365)),
    "WorkingCapital_Days_Trend":       lambda C: C.groupby(C["Symbol"]).apply(lambda g: slope(g["DaysSalesOutstanding"] + g["DaysInventoryOutstanding"] - g["DaysPayablesOutstanding"], 3)).reset_index(level=0, drop=True),
    "Cash_Return_on_Assets":           lambda C: safe_div(C["OperatingCashFlow"], C["TotalAssets"]),
    "CashConversion_Ratio_3yrAvg":     lambda C: C.groupby(C["Symbol"]).apply(lambda g: safe_div((g["OperatingCashFlow"]/g["NetIncome"]).rolling(3,1).mean(), 1)).reset_index(level=0, drop=True),
    "FCF_Margin_3yrAvg":               lambda C: C.groupby(C["Symbol"])["FreeCashFlow"].transform(lambda x: safe_div(x.rolling(3,1).mean(), C["TotalRevenue"].rolling(3,1).mean())),
    "FCF_Yield_on_Assets":             lambda C: safe_div(C["FreeCashFlow"], C["TotalAssets"]),
    "Operating_Efficiency_Ratio":      lambda C: safe_div(C["OperatingExpense"], C["TotalRevenue"]),
    "WorkingCapital_Turnover":         lambda C: safe_div(C["TotalRevenue"], C["CurrentAssets"] - C["CurrentLiabilities"]),

    # ─────────────────────────────────── INFORMATION · PREPARE
    "Accrual_Ratio":                   lambda C: safe_div(C["NetIncome"] - C["OperatingCashFlow"], C["TotalAssets"]),
    "Sloan_Accrual_Measure":           lambda C: safe_div(C["NetIncome"] - C["OperatingCashFlow"], C["TotalAssets"]),
    "Cash_Earnings_Ratio":             lambda C: safe_div(C["OperatingCashFlow"], C["NetIncome"]),
    "Net_Operating_Accruals":          lambda C: safe_div(C["NetIncome"] - C["OperatingCashFlow"] - C["Depreciation"], C["TotalAssets"]),
    "Dechow_Dichev_AQ":                lambda C: safe_div(ratio_funcs["Accrual_Ratio"](C), np.abs(C["OperatingCashFlow"])),
    "Percent_Accruals":                lambda C: safe_div(C["NetIncome"] - C["OperatingCashFlow"], C["TotalRevenue"]),
    "Revenue_Quality":                 lambda C: safe_div(C["AccountsReceivable"].diff(), C["TotalRevenue"].diff()),
    "Revenue_Quality_Delta_AR":        lambda C: safe_div(C["AccountsReceivable"].diff(), C["TotalRevenue"].diff()),
    "WC_Accrual_Ratio":                lambda C: safe_div((C["AccountsReceivable"].diff() + C["Inventory"].diff() - C["AccountsPayable"].diff()), C["TotalRevenue"]),
    "DSO_Trend_3yr": lambda C: C.groupby("Symbol")["DaysSalesOutstanding"].transform(lambda x: slope(x, 3)),
    "Inventory_Inflation_3yr": lambda C: C.groupby("Symbol")["DaysInventoryOutstanding"].transform(lambda x: slope(x, 3)),
    "ROA_3yrAvg":                      lambda C: safe_div(C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: x.rolling(3, 1).mean()), C.groupby(C["Symbol"])["TotalAssets"].transform(lambda x: x.rolling(3, 1).mean())),
    "ROE_3yrAvg":                      lambda C: safe_div(C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: x.rolling(3, 1).mean()), C.groupby(C["Symbol"])["TotalEquity"].transform(lambda x: x.rolling(3, 1).mean())),
    "GrossMargin_3yrAvg":              lambda C: safe_div(C.groupby(C["Symbol"])["GrossProfit"].transform(lambda x: x.rolling(3, 1).mean()), C.groupby(C["Symbol"])["TotalRevenue"].transform(lambda x: x.rolling(3, 1).mean())),
    "EBITDA_Margin_3yrAvg":            lambda C: safe_div(C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: x.rolling(3, 1).mean()), C.groupby(C["Symbol"])["TotalRevenue"].transform(lambda x: x.rolling(3, 1).mean())),
    "OCF_Margin_3yrAvg":               lambda C: safe_div(C.groupby(C["Symbol"])["OperatingCashFlow"].transform(lambda x: x.rolling(3, 1).mean()), C.groupby(C["Symbol"])["TotalRevenue"].transform(lambda x: x.rolling(3, 1).mean())),
    "Accruals_to_Sales":               lambda C: safe_div(C["NetIncome"] - C["OperatingCashFlow"], C["TotalRevenue"]),

    # ───────────────────────────────────── COGNITIVE · PREPARE
    "R_D_Growth_Rate":                 lambda C: C["ResearchAndDevelopment"].pct_change(),
    "R_D_to_Opex":                     lambda C: safe_div(C["ResearchAndDevelopment"], C["OperatingExpense"]),
    "RnD_Plus_CapEx_Intensity":        lambda C: safe_div(C["ResearchAndDevelopment"] + C["CapitalExpenditure"], C["TotalRevenue"]),
    "R&D_Intensity":                   lambda C: safe_div(C["ResearchAndDevelopment"], C["TotalRevenue"]),
    "R_D_3yr_CAGR":                    lambda C: C.groupby(C["Symbol"])["ResearchAndDevelopment"].pct_change(periods=3),
    "Innovation_Ratio":                lambda C: safe_div(C["ResearchAndDevelopment"], C["GrossProfit"]),
    "R_D_Growth":                      lambda C: C.groupby(C["Symbol"])["ResearchAndDevelopment"].pct_change(),

    # ─────────────────────────────────────── SOCIAL · PREPARE
    "Interest_Coverage_Ratio":         lambda C: safe_div(C["EarningBeforeInterestAndTax"], C["InterestExpense"]),
    "Cash_Interest_Coverage_Ratio":    lambda C: safe_div(C["OperatingCashFlow"], C["InterestExpense"]),
    "EBITDA_Interest_Coverage":        lambda C: safe_div(C["EBITDA"], C["InterestExpense"]),
    "DSCR":                            lambda C: safe_div(C["EBITDA"] - C["CapitalExpenditure"], C["InterestExpense"] + C.get("DebtRepayment", 0)),
    "Debt_to_Assets":                  lambda C: safe_div(C["TotalDebt"], C["TotalAssets"]),
    "EquityRatio":                     lambda C: safe_div(C["TotalEquity"], C["TotalAssets"]),
    "LongTerm_Debt_to_Equity":         lambda C: safe_div(C["LongTermDebt"], C["TotalEquity"]),
    "Net_Debt_to_Equity":              lambda C: safe_div(C["TotalDebt"] - C["Cash"], C["TotalEquity"]),
    "Financial_Leverage":              lambda C: safe_div(C["TotalAssets"], C["TotalEquity"]),
    "Times_Interest_Earned":           lambda C: safe_div(C["EarningBeforeInterestAndTax"], C["InterestExpense"]),
    "Debt_Maturity_Split":             lambda C: safe_div(C["LongTermDebt"], C["TotalDebt"]),
    "InterestCoverage_Cushion": lambda C: safe_div(
    C["EBITDA"].fillna(C.groupby("SectorName")["EBITDA"].transform("median"))
      - C["CapitalExpenditure"].fillna(
            C.groupby("SectorName")["CapitalExpenditure"].transform("median")),
    C["InterestExpense"].fillna(
            C.groupby("SectorName")["InterestExpense"].transform("median"))
),


    #### ABSORB STAGES

    # ───────────────────────────────────── PHYSICAL · ABSORB
    "Asset_Turnover_Ratio":            lambda C: safe_div(C["TotalRevenue"], C["TotalAssets"]),
    "Gross_Profit_Margin":             lambda C: safe_div(C["GrossProfit"], C["TotalRevenue"]),
    "ROA":                             lambda C: safe_div(C["NetIncome"], C["TotalAssets"]),
    "Operating_Margin":                lambda C: safe_div(C["OperatingIncome"], C["TotalRevenue"]),
    "EBITDA_Margin":                   lambda C: safe_div(C["EBITDA"], C["TotalRevenue"]),
    "Net_Income_Margin":               lambda C: safe_div(C["NetIncome"], C["TotalRevenue"]),
    "OperatingIncome_Margin":          lambda C: safe_div(C["OperatingIncome"], C["TotalRevenue"]),
    "Cost_of_Revenue_Ratio":           lambda C: safe_div(C["CostOfRevenue"], C["TotalRevenue"]),
    "Operating_Expense_Ratio":         lambda C: safe_div(C["OperatingExpense"], C["TotalRevenue"]),
    "SG_A_to_Revenue":                 lambda C: safe_div(C["SellingGeneralAndAdministration"], C["TotalRevenue"]),
    "EBT_Margin":                      lambda C: safe_div(C["PretaxIncome"], C["TotalRevenue"]),
    "ROCE":                            lambda C: safe_div(C["EarningBeforeInterestAndTax"], C["TotalAssets"] - C["CurrentLiabilities"]),
    "GrossMargin_Stability":           lambda C: (safe_div(C["GrossProfit"], C["TotalRevenue"]).groupby(C["Symbol"]).transform(lambda x: x.rolling(5, 3).std())),
    "Fixed_Asset_Turnover":            lambda C: safe_div(C["TotalRevenue"], C["TotalAssets"] - C["CurrentAssets"]),
    "Inventory_to_Assets":             lambda C: safe_div(C["Inventory"], C["TotalAssets"]),
    "Receivables_to_Assets":           lambda C: safe_div(C["AccountsReceivable"], C["TotalAssets"]),
    "Payables_to_Assets":              lambda C: safe_div(C["AccountsPayable"], C["TotalAssets"]),
    "Operating_Return_on_Opex":        lambda C: safe_div(C["OperatingIncome"], C["OperatingExpense"]),
    "CashFlow_Margin":                 lambda C: safe_div(C["OperatingCashFlow"], C["TotalRevenue"]),
    "CashConversionEfficiency":        lambda C: safe_div(C["OperatingCashFlow"], C["TotalRevenue"]),
    "Operating_Leverage":              lambda C: safe_div(C["OperatingIncome"].pct_change(5), C["TotalRevenue"].pct_change(5)),
    "InvestedCapital_Turnover":        lambda C: safe_div(C["TotalRevenue"], C["InvestedCapital"]),
    "FCF_Margin":                      lambda C: safe_div(C["FreeCashFlow"], C["TotalRevenue"]),

    # ─────────────────────────────────── INFORMATION · ABSORB
    "Earnings_Volatility":             lambda C: C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "EBITDA_Volatility":               lambda C: C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "Earnings_Volatility_AbsMean":     lambda C: C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).apply(lambda y: np.abs(y.mean())))),
    "EBITDA_CV":                       lambda C: C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "NetIncome_StDev_3yr":             lambda C: C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: x.rolling(3, 2).std()),
    "EBITDA_StDev_3yr":                lambda C: C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: x.rolling(3, 2).std()),
    "Revenue_StDev_3yr":               lambda C: C.groupby(C["Symbol"])["TotalRevenue"].transform(lambda x: x.rolling(3, 2).std()),
    "OCF_StDev_3yr":                   lambda C: C.groupby(C["Symbol"])["OperatingCashFlow"].transform(lambda x: x.rolling(3, 2).std()),
    "ROA_StDev_5yr":                   lambda C: C.groupby(C["Symbol"]).apply(lambda g: safe_div(g["NetIncome"], g["TotalAssets"]).rolling(5, 2).std()).reset_index(level=0, drop=True),
    "EBITDA_Volatility_5yr":           lambda C: C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "Earnings_Volatility_5yr":         lambda C: C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "OperatingCF_Volatility_5yr":      lambda C: C.groupby(C["Symbol"])["OperatingCashFlow"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "Sales_Volatility_5yr":            lambda C: C.groupby(C["Symbol"])["TotalRevenue"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "FCF_Volatility_3yr":              lambda C: C.groupby(C["Symbol"])["FreeCashFlow"].transform(lambda x: x.rolling(3, 2).std()),
    "GrossMargin_Volatility":          lambda C: C.groupby(C["Symbol"])["GrossProfit"].transform(lambda x: safe_div((x/C["TotalRevenue"]).rolling(5,2).std(), (x/C["TotalRevenue"]).rolling(5,2).mean())),

    # ───────────────────────────────────── COGNITIVE · ABSORB
    "Gross_Profitability_Alt":         lambda C: safe_div(C["GrossProfit"], C["NetAssets"]),
    "GrossProfit_to_Equity":           lambda C: safe_div(C["GrossProfit"], C["TotalEquity"]),
    "Price_Premium_Index":             lambda C: safe_div(ratio_funcs["Gross_Profit_Margin"](C), safe_div(C["GrossProfit"], C["TotalRevenue"]).groupby(C["SectorName"]).transform("median")),

    # ─────────────────────────────────────── SOCIAL · ABSORB
    "LT_Debt_to_EBITDA":               lambda C: safe_div(C["LongTermDebt"], C["EBITDA"]),
    "InterestBearingDebt_to_EBITDA":   lambda C: safe_div(C["TotalDebt"], C["EBITDA"]),
    "Net_Debt_to_EBITDA":              lambda C: safe_div(C["TotalDebt"] - C["CashAndCashEquivalents"], C["EBITDA"]),
    "LT_Debt_to_TotalDebt":            lambda C: safe_div(C["LongTermDebt"], C["TotalDebt"]),
    "Effective_Tax_Rate":              lambda C: safe_div(C["IncomeTaxExpense"], C["PretaxIncome"]),
    "InterestExpense_to_Sales":        lambda C: safe_div(C["InterestExpense"], C["TotalRevenue"]),
    "InterestCoverage_Volatility_5yr": lambda C: C.groupby(C["Symbol"]).apply(lambda g: safe_div((g["EarningBeforeInterestAndTax"]/g["InterestExpense"]).rolling(5, 2).std(), (g["EarningBeforeInterestAndTax"]/g["InterestExpense"]).rolling(5, 2).mean())).reset_index(level=0, drop=True),
    "InterestCoverage_Volatility_3yr": lambda C: C.groupby(C["Symbol"]).apply(lambda g: safe_div((g["EarningBeforeInterestAndTax"]/g["InterestExpense"]).rolling(3, 2).std(), (g["EarningBeforeInterestAndTax"]/g["InterestExpense"]).rolling(3, 2).mean())).reset_index(level=0, drop=True),


    #### RECOVER STAGES

    # ───────────────────────────────────── PHYSICAL · RECOVER
    "ROE":                             lambda C: safe_div(C["NetIncome"], C["TotalEquity"]),
    "Debt_to_Equity":                  lambda C: safe_div(C["TotalDebt"], C["TotalEquity"]),
    "Debt_to_Equity_Liab":             lambda C: safe_div(C["TotalLiabilitiesAsReported"], C["TotalEquity"]),
    "EPS":                             lambda C: safe_div(C["NetIncome"], C["BasicAverageShares"]),
    "Cash_Dividends_to_Net_Income":    lambda C: safe_div(C["CashDividendsPaid"], C["NetIncome"]),
    "Return_on_Tangible_Equity":       lambda C: safe_div(C["NetIncome"], C["TotalEquity"] - C["Goodwill"]),
    "Total_Payout_Ratio":              lambda C: safe_div(C.get("CashDividendsPaid", 0) + C.get("CommonStockRepurchased", 0), C["NetIncome"]),
    "NetDebt_PayDown_Rate":            lambda C: safe_div(-(C["TotalDebt"] - C["CashAndCashEquivalents"]).diff(), C["TotalDebt"]),
    "Incremental_ROIC":                lambda C: safe_div(C["NetIncome"].diff() + C["InterestExpense"].diff() * (1 - 0.30), (C["TotalDebt"] + C["TotalEquity"] - C["Cash"]).diff()),
    "ROIC_Slope_5yr":                  lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: slope(x, 5)),
    "ROIC_Slope_3yr":                  lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: slope(x, 3)),
    "ROIC_Trend_5yr_Slope":            lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: slope(x, 5)),
    "ROIC_Trend_3yr_Slope":            lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: slope(x, 3)),
    "ROIC_3yr_Avg":                    lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: x.rolling(3, 1).mean()),
    "ROIC_5yr_Median":                 lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: x.rolling(5, 1).median()),
    "ROIC_3yr_Median":                 lambda C: C.groupby(C["Symbol"])["ROIC"].transform(lambda x: x.rolling(3, 1).median()),
    "EBITDA_DropThrough":              lambda C: safe_div(C["EBITDA"].diff(), C["TotalRevenue"].diff()),
    "OperatingLeverage_Slope":         lambda C: C.groupby(C["Symbol"]).apply(lambda g: slope((g["OperatingIncome"].pct_change()) / (g["TotalRevenue"].pct_change()), 5)).reset_index(level=0, drop=True),
    "Revenue_Recovery_Rate":           lambda C: safe_div(C.groupby(C["Symbol"])["TotalRevenue"].shift(-1), C["TotalRevenue"]),
    "Retention_to_Growth":             lambda C: safe_div(C["RetainedEarnings"], C["TotalRevenue"].diff()),

    # ─────────────────────────────────── INFORMATION · RECOVER
    "Advertising_to_Sales":            lambda C: safe_div(C["SellingAndMarketingExpense"].fillna(0), C["TotalRevenue"]),
    "Marketing_Efficiency_Ratio":      lambda C: safe_div(C["TotalRevenue"] - C["CostOfRevenue"] - C["SellingAndMarketingExpense"], C["SellingAndMarketingExpense"]),
    "Combined_SellingExpense_to_Sales":lambda C: safe_div(C["SellingAndMarketingExpense"], C["TotalRevenue"]),
    "SGA_to_Sales":                    lambda C: safe_div(C["SellingGeneralAndAdministration"], C["TotalRevenue"]),
    "Advertising_to_Sales_3yrCAGR":    lambda C: ratio_funcs["Advertising_to_Sales"](C).groupby(C["Symbol"]).pct_change(3),
    "OperatingMargin_Delta":           lambda C: (C["OperatingIncome"]/C["TotalRevenue"]) - (C["OperatingIncome"].shift(1)/C["TotalRevenue"].shift(1)),
    "OperatingMargin_Slope_5yr":       lambda C: C.groupby(C["Symbol"]).apply(lambda g: slope(g["OperatingIncome"]/g["TotalRevenue"], 5)).reset_index(level=0, drop=True),
    "OperatingMargin_Slope_3yr":       lambda C: C.groupby(C["Symbol"]).apply(lambda g: slope(g["OperatingIncome"]/g["TotalRevenue"], 3)).reset_index(level=0, drop=True),
    "EBITDA_Margin_StdDev_5yr":        lambda C: C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: (x/C["TotalRevenue"]).rolling(5,2).std()),
    "EBITDA_Margin_StdDev_3yr":        lambda C: C.groupby(C["Symbol"])["EBITDA"].transform(lambda x: (x/C["TotalRevenue"]).rolling(3,2).std()),
    "EPS_Volatility_5yr":              lambda C: C.groupby(C["Symbol"])["BasicEPS"].transform(lambda x: safe_div(x.rolling(5,2).std(), x.rolling(5,2).mean().abs())),
    "EPS_Volatility_3yr":              lambda C: C.groupby(C["Symbol"])["BasicEPS"].transform(lambda x: safe_div(x.rolling(3,2).std(), x.rolling(3,2).mean().abs())),
    "EPS_Growth":                      lambda C: safe_div(C["BasicEPS"].diff(), C["BasicEPS"].shift(1)),
    "EPS_Stability":                   lambda C: C.groupby(C["Symbol"])["BasicEPS"].transform(lambda x: safe_div(x.rolling(5,2).std(), x.rolling(5,2).mean().abs())),
    "ROE_StdDev_5yr":                  lambda C: C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: safe_div(x.rolling(5,2).std(), C["TotalEquity"].rolling(5,2).mean())),
    "ROE_StdDev_3yr":                  lambda C: C.groupby(C["Symbol"])["NetIncome"].transform(lambda x: safe_div(x.rolling(3,2).std(), C["TotalEquity"].rolling(3,2).mean())),

    # ───────────────────────────────────── COGNITIVE · RECOVER
    "NOPAT_Alt":                       lambda C: safe_div(C["NetIncome"] + C["InterestExpense"] * (1 - C.get("EffectiveTaxRateAsReported", 0.30)), C["TotalRevenue"]),
    "NOPAT_to_Avg_InvestedCapital":    lambda C: safe_div(C["NetIncome"] + C["InterestExpense"] * (1 - 0.30), (C["TotalDebt"] + C["TotalEquity"] - C["Cash"]).rolling(2).mean()),
    "ROIC":                            lambda C: safe_div(C["EarningBeforeInterestAndTax"], C["TotalDebt"] + C["TotalEquity"] - C["Cash"]),

    # ─────────────────────────────────────── SOCIAL · RECOVER
    "Equity_Issuance_Rate":            lambda C: safe_div(C["IssuanceOfCapitalStock"], C["TotalEquity"]),
    "Share_Issuance_Rate":             lambda C: safe_div(C["BasicAverageShares"].diff(), C["BasicAverageShares"].shift(1)),
    "Share_Dilution_3yrChg":           lambda C: safe_div(C["BasicAverageShares"], C.groupby(C["Symbol"])["BasicAverageShares"].shift(3)) - 1,
    
    #### ADAPT STAGES

     # ───────────────────────────────────── PHYSICAL · ADAPT
    "CapEx_to_Sales":                  lambda C: safe_div(C["CapitalExpenditure"], C["TotalRevenue"]),
    "CapEx_to_Depreciation":           lambda C: safe_div(C["CapitalExpenditure"], C["Depreciation"]),
    "CapEx_plus_RnD_to_Sales":         lambda C: safe_div(C["CapitalExpenditure"] + C["ResearchAndDevelopment"], C["TotalRevenue"]),
    "CapEx_GrowthRate":                lambda C: C.groupby(C["Symbol"])["CapitalExpenditure"].pct_change(),
    "CapEx_Dep_Growth":                lambda C: safe_div(C["CapitalExpenditure"], C["Depreciation"]).pct_change(),
    "Maintenance_CapEx_Ratio":         lambda C: safe_div(C["CapitalExpenditure"], C["Depreciation"]),
    "FreeCashFlow_to_InvestedCapital": lambda C: safe_div(C["FreeCashFlow"], C["InvestedCapital"]),
    "CapitalisedSoftware_to_Assets":   lambda C: safe_div(C.get("SoftwareIntangibles", 0), C["TotalAssets"]),
    "Acquisitions_to_OCF":             lambda C: safe_div(C.get("AcquisitionsCashOutflow", 0), C["OperatingCashFlow"]),
    "Reinvestment_Rate":               lambda C: safe_div(C["CapitalExpenditure"] - C["Depreciation"], C["OperatingCashFlow"]),
    "CapEx_Variability_5yr":           lambda C: C.groupby(C["Symbol"])["CapitalExpenditure"].transform(lambda x: safe_div(x.rolling(5, 2).std(), x.rolling(5, 2).mean())),
    "CapEx_Variability_3yr":           lambda C: C.groupby(C["Symbol"])["CapitalExpenditure"].transform(lambda x: safe_div(x.rolling(3, 2).std(), x.rolling(3, 2).mean())),
    "OCF_to_CapEx":                    lambda C: safe_div(C["OperatingCashFlow"], C["CapitalExpenditure"]),
    "OCF_FreeCash_Cushion":            lambda C: safe_div(C["OperatingCashFlow"] - C["CapitalExpenditure"] - C["CashDividendsPaid"], C["TotalRevenue"]),

    # ─────────────────────────────────── INFORMATION · ADAPT
    "Retention_Ratio":                 lambda C: 1 - safe_div(C["CashDividendsPaid"], C["NetIncome"]),
    "Gross_Profitability":             lambda C: safe_div(C["GrossProfit"], C["TotalAssets"]),
    "NOPAT_Margin":                    lambda C: safe_div(C["NetIncome"] + C["InterestExpense"] * (1 - 0.30), C["TotalRevenue"]),
    "Sustainable_Growth_Rate":         lambda C: safe_div(C["NetIncome"], C["TotalEquity"]) * (1 - safe_div(C["CashDividendsPaid"], C["NetIncome"])),
    "Revenue_CAGR_5yr":                lambda C: (safe_div(C["TotalRevenue"], C.groupby(C["Symbol"])["TotalRevenue"].shift(5)))**0.2 - 1,
    "Revenue_CAGR_3yr":                lambda C: (safe_div(C["TotalRevenue"], C.groupby(C["Symbol"])["TotalRevenue"].shift(3)))**(1/3) - 1,
    "Revenue_Growth":                  lambda C: C.groupby(C["Symbol"])["TotalRevenue"].pct_change(),
    "NetIncome_Growth":                lambda C: C.groupby(C["Symbol"])["NetIncome"].pct_change(),
    "EBITDA_Growth":                   lambda C: C.groupby(C["Symbol"])["EBITDA"].pct_change(),
    "Assets_Growth":                   lambda C: C.groupby(C["Symbol"])["TotalAssets"].pct_change(),
    "Equity_Growth":                   lambda C: C.groupby(C["Symbol"])["TotalEquity"].pct_change(),
    "OCF_Growth":                      lambda C: C.groupby(C["Symbol"])["OperatingCashFlow"].pct_change(),
    "FCF_Growth":                      lambda C: safe_div(C["FreeCashFlow"].diff(), C["FreeCashFlow"].shift(1)),
    "FCF_Growth_3yrCAGR":              lambda C: (safe_div(C["FreeCashFlow"], C.groupby(C["Symbol"])["FreeCashFlow"].shift(3)))**(1/3) - 1,
    "OCF_CAGR_5yr":                    lambda C: safe_div(C["OperatingCashFlow"], C.groupby(C["Symbol"])["OperatingCashFlow"].shift(5))**0.2 - 1,
    "OCF_CAGR_3yr":                    lambda C: safe_div(C["OperatingCashFlow"], C.groupby(C["Symbol"])["OperatingCashFlow"].shift(3))**(1/3) - 1,
    "Dividend_Growth":                 lambda C: C.groupby(C["Symbol"])["DividendPerShare"].pct_change(),
    "Dividend_Growth_Alt":             lambda C: C.groupby(C["Symbol"])["CashDividendsPaid"].pct_change(),
    "CapEx_Growth":                    lambda C: C.groupby(C["Symbol"])["CapitalExpenditure"].pct_change(),
    "GrossProfit_Growth":              lambda C: C.groupby(C["Symbol"])["GrossProfit"].pct_change(),
    "OCF_Volatility_3yr":              lambda C: C.groupby(C["Symbol"])["OperatingCashFlow"].transform(lambda x: safe_div(x.rolling(3,2).std(), x.rolling(3,2).mean())),
    "CashFlow_Skewness":               lambda C: C.groupby(C["Symbol"])["OperatingCashFlow"].transform(lambda x: skew(x, 5)),
    "FCF_Volatility_5yr":              lambda C: C.groupby(C["Symbol"])["FreeCashFlow"].transform(lambda x: safe_div(x.rolling(5,2).std(), x.rolling(5,2).mean())),
    "OCF_Margin":                      lambda C: safe_div(C["OperatingCashFlow"], C["TotalRevenue"]),

    # ───────────────────────────────────── COGNITIVE · ADAPT
    "Market_Share_Ratio":              lambda C: safe_div(C["TotalRevenue"], C.groupby(C["SectorName"])["TotalRevenue"].transform("sum")),
    "Relative_Revenue_Growth_vs_Sector": lambda C: C.groupby(C["SectorName"])["TotalRevenue"].pct_change() - C.groupby(C["Symbol"])["TotalRevenue"].pct_change(),
    "Market_Share_Revenue_Change":     lambda C: safe_div(C["TotalRevenue"], C.groupby(C["SectorName"])["TotalRevenue"].transform("sum")),
    "Market_Share_EBITDA_Change":      lambda C: safe_div(C["EBITDA"], C.groupby(C["SectorName"])["EBITDA"].transform("sum")),
    "Relative_EBITDA_Growth_vs_Sector": lambda C: C.groupby(C["SectorName"])["EBITDA"].pct_change() - C.groupby(C["Symbol"])["EBITDA"].pct_change(),
    "Relative_OperatingIncome_Growth_vs_Sector": lambda C: C.groupby(C["SectorName"])["OperatingIncome"].pct_change() - C.groupby(C["Symbol"])["OperatingIncome"].pct_change(),
    "Market_Share_OperatingIncome_Change": lambda C: safe_div(C["OperatingIncome"], C.groupby(C["SectorName"])["OperatingIncome"].transform("sum")),
    "Rev_CAGR_vs_Sector":              lambda C: C.groupby(C["SectorName"])["TotalRevenue"].transform(lambda x: x.pct_change(periods=3)),
    "Relative_Revenue_Growth_Sector":  lambda C: C.groupby(C["SectorName"])["TotalRevenue"].pct_change(),
    "Relative_EBITDA_Growth_Sector":   lambda C: C.groupby(C["SectorName"])["EBITDA"].pct_change(),
    "Market_Share_of_Revenue":         lambda C: safe_div(C["TotalRevenue"], C.groupby(C["SectorName"])["TotalRevenue"].transform("sum")),
    "Revenue_Sector_Share_Growth":     lambda C: ratio_funcs["Market_Share_of_Revenue"](C).groupby(C["Symbol"]).pct_change(),
    "EBITDA_Sector_Share":             lambda C: safe_div(C["EBITDA"], C.groupby(C["SectorName"])["EBITDA"].transform("sum")),
    "Relative_OperatingIncome_Growth_Sector": lambda C: C.groupby(C["SectorName"])["OperatingIncome"].pct_change(),
    "Relative_NetIncome_Growth_Sector":       lambda C: C.groupby(C["SectorName"])["NetIncome"].pct_change(),
    "NetIncome_Sector_Share":                 lambda C: safe_div(C["NetIncome"], C.groupby(C["SectorName"])["NetIncome"].transform("sum")),
    "Sales_to_TotalAssets":            lambda C: safe_div(C["TotalRevenue"], C["TotalAssets"]),
    "Sales_to_Marketing_Leverage":     lambda C: safe_div(C["TotalRevenue"].pct_change(), C["SellingAndMarketingExpense"].pct_change()),
    "GrossMargin_Slope_5yr":           lambda C: C.groupby(C["Symbol"]).apply(lambda g: slope(g["GrossProfit"]/g["TotalRevenue"], 5)).reset_index(level=0, drop=True),
    "GrossMargin_Slope_3yr":           lambda C: C.groupby(C["Symbol"]).apply(lambda g: slope(g["GrossProfit"]/g["TotalRevenue"], 3)).reset_index(level=0, drop=True),
    "Price_Realisation_Index":         lambda C: (C["GrossProfit"]/C["TotalRevenue"]).diff() - safe_div(C["CostOfRevenue"].diff(), C["TotalRevenue"].shift(1)),

    # ─────────────────────────────────────── SOCIAL · ADAPT
    "DPS_to_EPS":                      lambda C: safe_div(C["DividendPerShare"], C.get("BasicEPS", pd.Series(np.nan))),
    "Dividend_Payout_Ratio":           lambda C: safe_div(C["CashDividendsPaid"], C["NetIncome"]),
    "FCF_Payout_Ratio":                lambda C: safe_div(C["CashDividendsPaid"], C["FreeCashFlow"]),
    "Dividend_Stability_Index":        lambda C: C.groupby(C["Symbol"])["CashDividendsPaid"].transform(lambda x: x.notna().rolling(10, 1).sum() / 10),
    "Dividend_Yield_on_FCF":           lambda C: safe_div(C["CashDividendsPaid"], C["FreeCashFlow"]),
    "Dividend_Coverage":               lambda C: safe_div(C["OperatingCashFlow"], C["CashDividendsPaid"]),
    "Dividend_Coverage_FCF":           lambda C: safe_div(C["OperatingCashFlow"] - C["CapitalExpenditure"], C["CashDividendsPaid"]),
    "Dividend_Payout_CV":              lambda C: C.groupby(C["Symbol"]).apply(lambda g: safe_div((g["CashDividendsPaid"]/g["NetIncome"]).rolling(5,2).std(), (g["CashDividendsPaid"]/g["NetIncome"]).rolling(5,2).mean())).reset_index(level=0, drop=True),
    "Share_Count_Reduction_YoY":       lambda C: -safe_div(C["BasicAverageShares"].diff(), C["BasicAverageShares"].shift(1)),
    "Share_Dilution_3yrChg":           lambda C: safe_div(C["BasicAverageShares"], C.groupby(C["Symbol"])["BasicAverageShares"].shift(3)) - 1,
    "Net_Buyback_to_FCF":              lambda C: safe_div(C.get("CommonStockRepurchased", 0) - C.get("IssuanceOfCapitalStock", 0), C["FreeCashFlow"]),
}

derived_ratio_funcs = {
    # ── ratios that need ROIC ──
    "ROIC_Slope_5yr"        : ratio_funcs.pop("ROIC_Slope_5yr"),
    "ROIC_Slope_3yr"        : ratio_funcs.pop("ROIC_Slope_3yr"),
    "ROIC_Trend_5yr_Slope"  : ratio_funcs.pop("ROIC_Trend_5yr_Slope"),
    "ROIC_Trend_3yr_Slope"  : ratio_funcs.pop("ROIC_Trend_3yr_Slope"),
    "ROIC_3yr_Avg"          : ratio_funcs.pop("ROIC_3yr_Avg"),
    "ROIC_5yr_Median"       : ratio_funcs.pop("ROIC_5yr_Median"),
    "ROIC_3yr_Median"       : ratio_funcs.pop("ROIC_3yr_Median"),

    # ── ratios that need Days-based helpers ──
    "WorkingCapital_Days_Trend": ratio_funcs.pop("WorkingCapital_Days_Trend"),
    "DSO_Trend_3yr"            : ratio_funcs.pop("DSO_Trend_3yr"),
    "Inventory_Inflation_3yr"  : ratio_funcs.pop("Inventory_Inflation_3yr"),

    # ── others that reference earlier ratios ──
    "CashConversion_Ratio_3yrAvg" : ratio_funcs.pop("CashConversion_Ratio_3yrAvg"),
    "Revenue_Sector_Share_Growth" : ratio_funcs.pop("Revenue_Sector_Share_Growth"),
}

# ------------------------------------------------------------------------------
# 1 ░░  DOMAIN-STAGE MAP  ░░  (165 keys)
# ------------------------------------------------------------------------------

ratio_domain_stage_map = {

# ░░░░░░░░░░░░░░░  PHYSICAL · PREPARE  ░░░░░░░░░░░░░░░
"Cash_to_Total_Assets":"Phys-Prepare","Cash_to_Total_Assets_Alt":"Phys-Prepare",
"Current_Ratio":"Phys-Prepare","Quick_Ratio":"Phys-Prepare",
"Cash_Ratio":"Phys-Prepare","Operating_Cash_Flow_Ratio":"Phys-Prepare",
"NetWorkingCapital_to_Assets":"Phys-Prepare","Cash_Conversion_Cycle":"Phys-Prepare",
"Operating_CF_to_Debt":"Phys-Prepare","NetDebt_to_OCF":"Phys-Prepare",
"DaysPayablesOutstanding":"Phys-Prepare","WorkingCapital_to_Sales":"Phys-Prepare",
"CashEquivalents_to_CurrentLiab":"Phys-Prepare","AccountsReceivable_Turnover":"Phys-Prepare",
"Inventory_Turnover":"Phys-Prepare","Net_Operating_WC_to_Assets":"Phys-Prepare",
"CashFlow_to_Debt":"Phys-Prepare","CashFlow_to_Assets":"Phys-Prepare","FCF_to_Assets":"Phys-Prepare",
"DaysSalesOutstanding":"Phys-Prepare","DaysInventoryOutstanding":"Phys-Prepare",
"OperatingCycle":"Phys-Prepare","BeginningCash_to_Sales":"Phys-Prepare",
"RetainedEarnings_to_Assets":"Phys-Prepare","Deposits_to_Assets":"Phys-Prepare",
"Cash_Burn_Duration":"Phys-Prepare","WorkingCapital_Days_Trend":"Phys-Prepare",
"Cash_Return_on_Assets":"Phys-Prepare","CashConversion_Ratio_3yrAvg":"Phys-Prepare",
"FCF_Margin_3yrAvg":"Phys-Prepare","FCF_Yield_on_Assets":"Phys-Prepare",
"Operating_Efficiency_Ratio":"Phys-Prepare","WorkingCapital_Turnover":"Phys-Prepare",

# dual-tag ratios
"CashFlow_Coverage_of_Interest":["Phys-Prepare","Soc-Prepare"],
"OCF_to_NetIncome":["Phys-Prepare","Info-Prepare"],

# ░░░░░░░░░░░░░░░  INFORMATION · PREPARE  ░░░░░░░░░░░░░░░
"Accrual_Ratio":"Info-Prepare","Sloan_Accrual_Measure":"Info-Prepare",
"Cash_Earnings_Ratio":"Info-Prepare","Net_Operating_Accruals":"Info-Prepare",
"Dechow_Dichev_AQ":"Info-Prepare","Percent_Accruals":"Info-Prepare",
"Revenue_Quality":"Info-Prepare","Revenue_Quality_Delta_AR":"Info-Prepare",
"WC_Accrual_Ratio":"Info-Prepare","DSO_Trend_3yr":"Info-Prepare",
"Inventory_Inflation_3yr":"Info-Prepare","ROA_3yrAvg":"Info-Prepare",
"ROE_3yrAvg":"Info-Prepare","GrossMargin_3yrAvg":"Info-Prepare",
"EBITDA_Margin_3yrAvg":"Info-Prepare","OCF_Margin_3yrAvg":"Info-Prepare",
"Accruals_to_Sales":"Info-Prepare",

# ░░░░░░░░░░░░░░░  COGNITIVE · PREPARE  ░░░░░░░░░░░░░░░
"R_D_Growth_Rate":"Cog-Prepare","R_D_to_Opex":"Cog-Prepare",
"RnD_Plus_CapEx_Intensity":"Cog-Prepare","R_D_3yr_CAGR":"Cog-Prepare",
"Innovation_Ratio":"Cog-Prepare","R_D_Growth":"Cog-Prepare",

# ░░░░░░░░░░░░░░░  SOCIAL · PREPARE  ░░░░░░░░░░░░░░░
"Interest_Coverage_Ratio":"Soc-Prepare","Cash_Interest_Coverage_Ratio":"Soc-Prepare",
"EBITDA_Interest_Coverage":"Soc-Prepare","DSCR":"Soc-Prepare",
"Debt_to_Assets":"Soc-Prepare","EquityRatio":"Soc-Prepare",
"LongTerm_Debt_to_Equity":"Soc-Prepare","Net_Debt_to_Equity":"Soc-Prepare",
"Financial_Leverage":"Soc-Prepare","Times_Interest_Earned":"Soc-Prepare",
"Debt_Maturity_Split":"Soc-Prepare","InterestCoverage_Cushion":"Soc-Prepare",

# ░░░░░░░░░░░░░░░  PHYSICAL · ABSORB  ░░░░░░░░░░░░░░░
"Asset_Turnover_Ratio":"Phys-Absorb","Gross_Profit_Margin":"Phys-Absorb",
"ROA":"Phys-Absorb","Operating_Margin":"Phys-Absorb","EBITDA_Margin":"Phys-Absorb",
"Net_Income_Margin":"Phys-Absorb","OperatingIncome_Margin":"Phys-Absorb",
"Cost_of_Revenue_Ratio":"Phys-Absorb","Operating_Expense_Ratio":"Phys-Absorb",
"SG_A_to_Revenue":"Phys-Absorb","EBT_Margin":"Phys-Absorb","ROCE":"Phys-Absorb",
"GrossMargin_Stability":"Phys-Absorb","Fixed_Asset_Turnover":"Phys-Absorb",
"Inventory_to_Assets":"Phys-Absorb","Receivables_to_Assets":"Phys-Absorb",
"Payables_to_Assets":"Phys-Absorb","Operating_Return_on_Opex":"Phys-Absorb",
"CashFlow_Margin":"Phys-Absorb","CashConversionEfficiency":"Phys-Absorb",
"Operating_Leverage":"Phys-Absorb","InvestedCapital_Turnover":"Phys-Absorb",
"FCF_Margin":"Phys-Absorb",

# ░░░░░░░░░░░░░░░  INFORMATION · ABSORB  ░░░░░░░░░░░░░░░
"Earnings_Volatility":"Info-Absorb","EBITDA_Volatility":"Info-Absorb",
"Earnings_Volatility_AbsMean":"Info-Absorb","EBITDA_CV":"Info-Absorb",
"NetIncome_StDev_3yr":"Info-Absorb","EBITDA_StDev_3yr":"Info-Absorb",
"Revenue_StDev_3yr":"Info-Absorb","OCF_StDev_3yr":"Info-Absorb",
"ROA_StDev_5yr":"Info-Absorb","EBITDA_Volatility_5yr":"Info-Absorb",
"Earnings_Volatility_5yr":"Info-Absorb","OperatingCF_Volatility_5yr":"Info-Absorb",
"Sales_Volatility_5yr":"Info-Absorb","FCF_Volatility_3yr":"Info-Absorb",
"GrossMargin_Volatility":"Info-Absorb",

# ░░░░░░░░░░░░░░░  COGNITIVE · ABSORB  ░░░░░░░░░░░░░░░
"Gross_Profitability_Alt":"Cog-Absorb","GrossProfit_to_Equity":"Cog-Absorb",
"Price_Premium_Index":"Cog-Absorb",

# ░░░░░░░░░░░░░░░  SOCIAL · ABSORB  ░░░░░░░░░░░░░░░
"LT_Debt_to_EBITDA":"Soc-Absorb","InterestBearingDebt_to_EBITDA":"Soc-Absorb",
"Net_Debt_to_EBITDA":"Soc-Absorb","LT_Debt_to_TotalDebt":"Soc-Absorb",
"Effective_Tax_Rate":"Soc-Absorb","InterestExpense_to_Sales":"Soc-Absorb",
"InterestCoverage_Volatility_5yr":"Soc-Absorb","InterestCoverage_Volatility_3yr":"Soc-Absorb",

# ░░░░░░░░░░░░░░░  PHYSICAL · RECOVER  ░░░░░░░░░░░░░░░
"ROE":"Phys-Recover","Debt_to_Equity":"Phys-Recover","Debt_to_Equity_Liab":"Phys-Recover",
"EPS":"Phys-Recover","Cash_Dividends_to_Net_Income":"Phys-Recover",
"Return_on_Tangible_Equity":"Phys-Recover","Total_Payout_Ratio":"Phys-Recover",
"NetDebt_PayDown_Rate":"Phys-Recover","Incremental_ROIC":"Phys-Recover",
"ROIC_Slope_5yr":"Phys-Recover","ROIC_Slope_3yr":"Phys-Recover",
"ROIC_Trend_5yr_Slope":"Phys-Recover","ROIC_Trend_3yr_Slope":"Phys-Recover",
"ROIC_3yr_Avg":"Phys-Recover","ROIC_5yr_Median":"Phys-Recover",
"ROIC_3yr_Median":"Phys-Recover","EBITDA_DropThrough":"Phys-Recover",
"OperatingLeverage_Slope":"Phys-Recover","Revenue_Recovery_Rate":"Phys-Recover",
"Retention_to_Growth":"Phys-Recover",

# ░░░░░░░░░░░░░░░  INFORMATION · RECOVER  ░░░░░░░░░░░░░░░
"Advertising_to_Sales":"Info-Recover","Marketing_Efficiency_Ratio":"Info-Recover",
"Combined_SellingExpense_to_Sales":"Info-Recover","SGA_to_Sales":"Info-Recover",
"Advertising_to_Sales_3yrCAGR":"Info-Recover","OperatingMargin_Delta":"Info-Recover",
"OperatingMargin_Slope_5yr":"Info-Recover","OperatingMargin_Slope_3yr":"Info-Recover",
"EBITDA_Margin_StdDev_5yr":"Info-Recover","EBITDA_Margin_StdDev_3yr":"Info-Recover",
"EPS_Volatility_5yr":"Info-Recover","EPS_Volatility_3yr":"Info-Recover",
"EPS_Growth":"Info-Recover","EPS_Stability":"Info-Recover",
"ROE_StdDev_5yr":"Info-Recover","ROE_StdDev_3yr":"Info-Recover",

# ░░░░░░░░░░░░░░░  COGNITIVE · RECOVER  ░░░░░░░░░░░░░░░
"NOPAT_Alt":"Cog-Recover","NOPAT_to_Avg_InvestedCapital":"Cog-Recover",
"ROIC":"Cog-Recover",

# ░░░░░░░░░░░░░░░  SOCIAL · RECOVER  ░░░░░░░░░░░░░░░
"Equity_Issuance_Rate":"Soc-Recover","Share_Issuance_Rate":"Soc-Recover",
"Share_Dilution_3yrChg":"Soc-Recover",

# ░░░░░░░░░░░░░░░  PHYSICAL · ADAPT  ░░░░░░░░░░░░░░░
"CapEx_to_Sales":"Phys-Adapt","CapEx_to_Depreciation":"Phys-Adapt",
"CapEx_plus_RnD_to_Sales":"Phys-Adapt","CapEx_GrowthRate":"Phys-Adapt",
"CapEx_Dep_Growth":"Phys-Adapt","Maintenance_CapEx_Ratio":"Phys-Adapt",
"FreeCashFlow_to_InvestedCapital":"Phys-Adapt","CapitalisedSoftware_to_Assets":"Phys-Adapt",
"Acquisitions_to_OCF":"Phys-Adapt","Reinvestment_Rate":"Phys-Adapt",
"CapEx_Variability_5yr":"Phys-Adapt","CapEx_Variability_3yr":"Phys-Adapt",
"OCF_to_CapEx":"Phys-Adapt","OCF_FreeCash_Cushion":"Phys-Adapt",

# ░░░░░░░░░░░░░░░  INFORMATION · ADAPT  ░░░░░░░░░░░░░░░
"R&D_Intensity":"Info-Adapt","Retention_Ratio":"Info-Adapt",
"Gross_Profitability":"Info-Adapt","NOPAT_Margin":"Info-Adapt",
"Sustainable_Growth_Rate":"Info-Adapt","Revenue_CAGR_5yr":"Info-Adapt",
"Revenue_CAGR_3yr":"Info-Adapt","Revenue_Growth":"Info-Adapt",
"NetIncome_Growth":"Info-Adapt","EBITDA_Growth":"Info-Adapt",
"Assets_Growth":"Info-Adapt","Equity_Growth":"Info-Adapt",
"OCF_Growth":"Info-Adapt","FCF_Growth":"Info-Adapt",
"FCF_Growth_3yrCAGR":"Info-Adapt","OCF_CAGR_5yr":"Info-Adapt",
"OCF_CAGR_3yr":"Info-Adapt","Dividend_Growth":"Info-Adapt",
"Dividend_Growth_Alt":"Info-Adapt","CapEx_Growth":"Info-Adapt",
"GrossProfit_Growth":"Info-Adapt","OCF_Volatility_3yr":"Info-Adapt",
"CashFlow_Skewness":"Info-Adapt","FCF_Volatility_5yr":"Info-Adapt",
"OCF_Margin":"Info-Adapt",

# ░░░░░░░░░░░░░░░  COGNITIVE · ADAPT  ░░░░░░░░░░░░░░░
"Market_Share_Ratio":"Cog-Adapt","Relative_Revenue_Growth_vs_Sector":"Cog-Adapt",
"Market_Share_Revenue_Change":"Cog-Adapt","Market_Share_EBITDA_Change":"Cog-Adapt",
"Relative_EBITDA_Growth_vs_Sector":"Cog-Adapt",
"Relative_OperatingIncome_Growth_vs_Sector":"Cog-Adapt",
"Market_Share_OperatingIncome_Change":"Cog-Adapt","Rev_CAGR_vs_Sector":"Cog-Adapt",
"Relative_Revenue_Growth_Sector":"Cog-Adapt","Relative_EBITDA_Growth_Sector":"Cog-Adapt",
"Market_Share_of_Revenue":"Cog-Adapt","Revenue_Sector_Share_Growth":"Cog-Adapt",
"EBITDA_Sector_Share":"Cog-Adapt","Relative_OperatingIncome_Growth_Sector":"Cog-Adapt",
"Relative_NetIncome_Growth_Sector":"Cog-Adapt","NetIncome_Sector_Share":"Cog-Adapt",
"Sales_to_TotalAssets":"Cog-Adapt","Sales_to_Marketing_Leverage":"Cog-Adapt",
"GrossMargin_Slope_5yr":"Cog-Adapt","GrossMargin_Slope_3yr":"Cog-Adapt",
"Price_Realisation_Index":"Cog-Adapt",

# ░░░░░░░░░░░░░░░  SOCIAL · ADAPT  ░░░░░░░░░░░░░░░
"DPS_to_EPS":"Soc-Adapt","Dividend_Payout_Ratio":"Soc-Adapt",
"FCF_Payout_Ratio":"Soc-Adapt","Dividend_Stability_Index":"Soc-Adapt",
"Dividend_Yield_on_FCF":"Soc-Adapt","Dividend_Coverage":"Soc-Adapt",
"Dividend_Coverage_FCF":"Soc-Adapt","Dividend_Payout_CV":"Soc-Adapt",
"Share_Count_Reduction_YoY":"Soc-Adapt","Net_Buyback_to_FCF":"Soc-Adapt",
}

# ──────────────────────────────────────────────────────────────────────
# 7 · PRE-DERIVED “DAY” COLUMNS   (avoid errors inside ratio lambdas)
# ──────────────────────────────────────────────────────────────────────
DAY_METRIC_SRC = {
    "DaysSalesOutstanding":     ("AccountsReceivable", "TotalRevenue"),
    "DaysInventoryOutstanding": ("Inventory",          "CostOfRevenue"),
    "DaysPayablesOutstanding":  ("AccountsPayable",    "CostOfRevenue"),
}
for new_col, (num, den) in DAY_METRIC_SRC.items():
    if new_col not in df.columns:
        df[new_col] = 365 * safe_div(df[num], df[den])

if "OperatingCycle" not in df.columns:
    df["OperatingCycle"] = (
        df["DaysSalesOutstanding"] + df["DaysInventoryOutstanding"]
        - df["DaysPayablesOutstanding"]
    )

DAY_RATIO_SRC = {
    "AccountsReceivableDays": ("AccountsReceivable", "TotalRevenue"),
    "InventoryDays":          ("Inventory",          "CostOfRevenue"),
    "AccountsPayableDays":    ("AccountsPayable",    "CostOfRevenue"),
}
for new_col, (num, den) in DAY_RATIO_SRC.items():
    if new_col not in df.columns:
        df[new_col] = 365 * safe_div(df[num], df[den])

# CommonStockRepurchased alias
if "CommonStockRepurchased" not in df.columns and "CommonStockPayments" in df.columns:
    df["CommonStockRepurchased"] = df["CommonStockPayments"]
if "CommonStockRepurchased" not in df.columns:
    df["CommonStockRepurchased"] = 0.0   # numeric fallback

# ──────────────────────────────────────────────────────────────────────
# 8 · UNIVERSAL RATIO RUNNER
# ──────────────────────────────────────────────────────────────────────
def _run_pass(func_dict: Dict[str, callable], label: str):
    raw, wins, audit_rows, skips = {}, {}, [], []
    total = len(func_dict)
    for i, (name, fn) in enumerate(func_dict.items(), 1):
        try:
            raw_series = _to_series(fn(df), df.index)
        except Exception as err:
            logger.warning("⏭️  %s skipped – %s", name, err)
            skips.append({"Ratio": name, "Error": str(err)})
            continue
        raw[f"{name}_raw"] = raw_series
        wins[name] = winsor(raw_series, WINSOR_PCT)
        audit_rows.append([
            name,
            raw_series.notna().mean() * 100,
            ((raw_series < wins[name]) & raw_series.notna()).mean() * 100,
            ((raw_series > wins[name]) & raw_series.notna()).mean() * 100,
        ])
        if i % 25 == 0 or i == total:
            logger.info("   (%s) computed %3d / %d ratios", label, i, total)
    return raw, wins, audit_rows, skips

# ──────────────────────────────────────────────────────────────────────
# 9 · EXECUTE BASE & DERIVED PASSES
# ──────────────────────────────────────────────────────────────────────
raw_base, wins_base, audit_base, skip_base = _run_pass(ratio_funcs, "base")
df = pd.concat([df,
                pd.DataFrame(raw_base, index=df.index),
                pd.DataFrame(wins_base, index=df.index)],
               axis=1)

raw_der, wins_der, audit_der, skip_der = _run_pass(derived_ratio_funcs, "derived")
df = pd.concat([df,
                pd.DataFrame(raw_der, index=df.index),
                pd.DataFrame(wins_der, index=df.index)],
               axis=1)

audit   = audit_base + audit_der
skipped = skip_base + skip_der

# ──────────────────────────────────────────────────────────────────────
# 10 · EXPORT ARTEFACTS
# ──────────────────────────────────────────────────────────────────────
pd.DataFrame(audit, columns=["ratio", "pct_available",
                             "pct_capped_low", "pct_capped_high"]
            ).to_csv(OUT_DIR / "Stage3_RatioCoverage.csv", index=False)

pd.DataFrame(skipped).to_csv(OUT_DIR / "Stage3_SkippedRatios.csv", index=False)

out_csv = OUT_DIR / "Stage3_Data_WithRatios.csv"
df.to_csv(out_csv, index=False)

buf = io.StringIO(); df.info(buf=buf)
logger.info("Final DataFrame info:\n%s", buf.getvalue())
logger.info("Saved ratios CSV → %s", out_csv)

data_stage_3 = df
logger.info("✅  STAGE 03 complete — `data_stage_3` ready")





2025-06-11 14:40:23,944 | INFO    | RUN_DIR        : outputs_rff\daily\2025-06-11


2025-06-11 14:40:23,946 | INFO    | SWAN_YEAR=2008  RUN_DATE=2025-06-11  WINSOR_PCT=0.01


2025-06-11 14:40:23,946 | INFO    | DATE_COL / ID_COL = ReportDate / Symbol


2025-06-11 14:40:24,997 | INFO    | Stage 02 CSV loaded: 34862 rows


2025-06-11 14:40:25,162 | INFO    |    (base) computed  25 / 219 ratios


2025-06-11 14:40:40,542 | INFO    |    (base) computed  50 / 219 ratios


2025-06-11 14:40:40,683 | INFO    |    (base) computed  75 / 219 ratios


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


2025-06-11 14:40:54,133 | INFO    |    (base) computed 100 / 219 ratios


2025-06-11 14:41:24,216 | INFO    |    (base) computed 125 / 219 ratios


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


2025-06-11 14:42:14,426 | INFO    |    (base) computed 150 / 219 ratios


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


2025-06-11 14:42:16,796 | INFO    |    (base) computed 175 / 219 ratios


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


2025-06-11 14:42:32,485 | INFO    |    (base) computed 200 / 219 ratios


  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


2025-06-11 14:42:35,958 | INFO    |    (base) computed 219 / 219 ratios






2025-06-11 14:42:49,298 | INFO    |    (derived) computed  12 / 12 ratios


  diff_b_a = subtract(b, a)


2025-06-11 14:43:07,956 | INFO    | Final DataFrame info:
<class 'pandas.core.frame.DataFrame'>
Index: 34862 entries, 36 to 35
Columns: 662 entries, Symbol to Revenue_Sector_Share_Growth
dtypes: datetime64[ns](1), float64(646), int64(5), object(10)
memory usage: 177.3+ MB



2025-06-11 14:43:07,958 | INFO    | Saved ratios CSV → outputs_rff\daily\2025-06-11\stage03\Stage3_Data_WithRatios.csv


2025-06-11 14:43:07,960 | INFO    | ✅  STAGE 03 complete — `data_stage_3` ready
