# 90_results_analysis

Aggregates results across policies: loads **selected** (or **latest**) runs, summarizes **training quality**, and prints/saves SSS outputs for Table 2-style reporting. Figures are reproduced in separate notebooks (91/92/93/94/96/99/100).


In [None]:
import os, sys, json, numpy as np, pandas as pd
import torch

import pathlib

def _find_project_root():
    here = pathlib.Path.cwd().resolve()
    for p in [here, *here.parents]:
        if (p / "src").is_dir():
            return p
    # Common Google Colab clone location
    cand = pathlib.Path("/content/econml")
    if (cand / "src").is_dir():
        return cand
    raise RuntimeError("Could not find project root containing src/. If on Colab, clone repo to /content/econml.")

PROJECT_ROOT = _find_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.io_utils import load_selected_run, find_latest_run_dir, load_json
from src.table2_builder import build_table2, save_table2_csv

ARTIFACTS_ROOT = os.environ.get("ARTIFACTS_ROOT", str(PROJECT_ROOT / "artifacts"))
print("ARTIFACTS_ROOT:", ARTIFACTS_ROOT)
POLICIES = ['taylor','mod_taylor','discretion','commitment']

def get_run(policy: str) -> str | None:
    rd = load_selected_run(ARTIFACTS_ROOT, policy)
    if rd is None:
        rd = find_latest_run_dir(ARTIFACTS_ROOT, policy)
    return rd

RUNS = {p: get_run(p) for p in POLICIES}
RUNS


# --- paper reporting helpers ---
ann = lambda x: 400.0*x  # annualized percent (quarterly -> annual)


In [None]:
def load_artifacts(run_dir: str | None):
    if run_dir is None:
        return {'run_dir': None, 'sss': None, 'sss_path': None, 'train_quality': None, 'sim_paths_path': None}
    out = {'run_dir': run_dir}

    # Prefer policy fixed-point SSS (used by train notebooks), fall back to legacy sss.json.
    sss_candidates = ['sss_policy_fixed_point.json', 'sss.json']
    sss_path = None
    for name in sss_candidates:
        cand = os.path.join(run_dir, name)
        if os.path.exists(cand):
            sss_path = cand
            break
    out['sss'] = load_json(sss_path) if sss_path is not None else None
    out['sss_path'] = sss_path

    tq_path = os.path.join(run_dir, 'train_quality.json')
    out['train_quality'] = load_json(tq_path) if os.path.exists(tq_path) else None
    sp_path = os.path.join(run_dir, 'sim_paths.npz')
    out['sim_paths_path'] = sp_path if os.path.exists(sp_path) else None
    return out

ARTS = {p: load_artifacts(RUNS[p]) for p in POLICIES}
ARTS


## Training quality summary

Quality is computed from residuals on a fresh validation batch sampled from the trainer's simulated validation distribution (RMS / max abs / share of states where all equations are below tolerance).

In [None]:
rows = []
for p in POLICIES:
    tq = ARTS[p]['train_quality'] or {}
    rows.append({
        'policy': p,
        'run_dir': ARTS[p]['run_dir'],
        'rms': tq.get('rms'),
        'max_abs': tq.get('max_abs'),
        'share_all_lt_tol': tq.get('share_all_lt_tol'),
        'tol': tq.get('tol'),
        'val_size': tq.get('val_size'),
    })
pd.DataFrame(rows)


In [None]:
from src.io_utils import load_json

def _load_sanity(run_dir: str | None):
    if run_dir is None:
        return None
    p = os.path.join(run_dir, 'sanity_checks.json')
    return load_json(p) if os.path.exists(p) else None

rows = []
for p in POLICIES:
    run_dir = ARTS[p]['run_dir']
    sc = _load_sanity(run_dir)
    tq = ARTS[p]['train_quality'] or {}
    tol = tq.get('tol', None)
    # Residual tolerance: use training tol if available, else a conservative default
    tol_res = float(tol) if tol is not None else 1e-4
    # Fixed-point tolerance: typically tighter; scale with residual tol
    tol_fp = max(1e-8, tol_res * 1e-2)
    if sc is None:
        rows.append({'policy': p, 'run_dir': run_dir, 'sanity_file': False,
                     'fp_max_abs': None, 'res_max_abs': None,
                     'pass_fp': None, 'pass_res': None, 'tol_fp': tol_fp, 'tol_res': tol_res})
        continue
    # expected schema from training notebooks
    fp = sc.get('fixed_point_max_abs_state_diff', sc.get('fixed_point', {}))
    rm = sc.get('residual_max_abs', sc.get('residual_max', {}))
    # take worst over regimes if present
    def worst(d):
        if isinstance(d, dict) and len(d):
            try:
                return float(max(d.values()))
            except Exception:
                pass
        return None
    fp_max = worst(fp)
    res_max = worst(rm)
    rows.append({
        'policy': p,
        'run_dir': run_dir,
        'sanity_file': True,
        'fp_max_abs': fp_max,
        'res_max_abs': res_max,
        'tol_fp': tol_fp,
        'tol_res': tol_res,
        'pass_fp': (fp_max is not None and fp_max <= tol_fp),
        'pass_res': (res_max is not None and res_max <= tol_res),
    })

df_sanity = pd.DataFrame(rows)
display(df_sanity)
if df_sanity['sanity_file'].all():
    bad = df_sanity[(df_sanity['pass_fp'] == False) | (df_sanity['pass_res'] == False)]
    if len(bad):
        print('\nSanity-check FAIL for:')
        display(bad[['policy','fp_max_abs','tol_fp','res_max_abs','tol_res','run_dir']])
    else:
        print('All sanity-checks PASS (fixed point + residuals).')
else:
    print('Some runs are missing sanity_checks.json (see table above).')


## Steady-state results (SSS)

Loads `sss.json` produced by each training notebook. This is the input for Table 2 construction.

In [None]:

for p in POLICIES:
    print("\n==============================")
    print("POLICY:", p)
    sss = ARTS[p]["sss"]
    if sss is None:
        print("Missing sss.json")
        continue
    # Print compactly
    if "by_regime" in sss:
        for s in ["0","1",0,1]:
            if s in sss["by_regime"]:
                reg = sss["by_regime"][s]
                print(f"Regime {s}: keys={list(reg.keys())[:10]} ...")
        print("by_regime:", sss["by_regime"])
    else:
        print(sss)


## Ergodic moments (optional)

If `sim_paths.npz` exists in each run, compute simple ergodic moments by regime.

In [None]:
# ---- Table 2 (paper) + Taylor variants ----
df_table2 = build_table2(ARTIFACTS_ROOT, device='cpu', include_rules=True)
display(df_table2)

csv_path = save_table2_csv(df_table2, ARTIFACTS_ROOT, filename='table2_reproduced.csv')
print('Saved:', csv_path)


# Notes:
# - Rates and inflation are annualized percent (400x quarterly net).
# - Output gap is in percent (100 * log(c/c_hat)).
# - Real rate uses realized next inflation: r_t = (1+i_t)/(1+pi_{t+1}) - 1.
