In [None]:
import os, sys, json
import numpy as np
import torch
sys.path.append('..')

from src.config import ModelParams, TrainConfig
from src.deqn import PolicyNetwork, Trainer, simulate_paths
from src.io_utils import make_run_dir, save_run_metadata, save_selected_run, pack_config, save_torch, save_csv, save_json, save_npz, ensure_dir
from src.metrics import residual_quality

# ---------- config ----------
ARTIFACTS_ROOT = os.path.join("..", "artifacts")
params = ModelParams(device="cpu", dtype=torch.float32)
cfg = TrainConfig.mid(seed=0)

run_dir = make_run_dir(ARTIFACTS_ROOT, "commitment", tag=cfg.mode, seed=cfg.seed)
save_run_metadata(run_dir, pack_config(params, cfg, extra={"policy":"commitment"}))
print("Run dir:", run_dir)


In [None]:
rbar = None  # commitment does not use rbar_by_regime


In [None]:
# Timeless commitment (paper): do NOT require precomputed SSS to initialize multipliers.
# We use a short transient (burn-in) only for post-training simulations/diagnostics to focus on the ergodic region.
commit_init = None


In [None]:

# ---------- model ----------
d_in, d_out = 7, 13
net = PolicyNetwork(d_in, d_out, hidden=cfg.hidden_layers, activation=cfg.activation)

trainer = Trainer(
    params=params,
    cfg=cfg,
    policy="commitment",
    net=net,
    rbar_by_regime=rbar if "commitment"=="mod_taylor" else None,
)



In [None]:
# No SSS-based initialization for timeless commitment.
commit_init = None


In [None]:

# ---------- train ----------
losses = trainer.train(
    commitment_sss=None,
    n_path=cfg.n_path,
    n_paths_per_step=cfg.n_paths_per_step,
)

# save weights and log
save_torch(os.path.join(run_dir, "weights.pt"), trainer.net.state_dict())
import pandas as pd
df = pd.DataFrame({"iter": np.arange(len(losses)), "loss": losses})
save_csv(os.path.join(run_dir, "train_log.csv"), df)

# quality on a fresh validation batch sampled from the model's simulated state distribution
with torch.inference_mode():
    x_val = trainer.simulate_initial_state(int(cfg.val_size), commitment_sss=None)
    # optional short burn-in for validation states (kept small; training itself is path-based)
    val_burn = int(getattr(cfg, "val_burn_in", 200))
    for _ in range(val_burn):
        x_val = trainer._step_state(x_val)
    resid = trainer._residuals(x_val).detach().cpu().numpy()
q = residual_quality(resid, tol=getattr(cfg, "report_tol", 1e-3))
save_json(os.path.join(run_dir, "train_quality.json"), q)
print("Train quality:", q)

# optional: mark this run as selected for results notebook
save_selected_run(ARTIFACTS_ROOT, trainer.policy, run_dir)

# Commitment SSS and timeless simulations are computed in the next cells.


In [None]:
# ---------- SSS from trained policy (paper-faithful, timeless perspective) ----------
from src.steady_states import solve_commitment_sss_from_policy

comm_sss = solve_commitment_sss_from_policy(params, trainer.net)
save_json(os.path.join(run_dir, 'sss_policy_fixed_point.json'), {'policy':'commitment','by_regime': comm_sss.by_regime})

print('=== COMMITMENT SSS (switching-consistent, includes lagged multipliers; timeless perspective) ===')
for _s in sorted(comm_sss.by_regime.keys()):
    print(f'Regime {_s}:')
    for _k,_v in comm_sss.by_regime[_s].items():
        print(f'{_k:>20}: {_v}')

# ---------- Sanity checks (fixed-regime mapping + switching-consistent residuals) ----------
from src.sanity_checks import fixed_point_check, residuals_check_switching_consistent
fp = fixed_point_check(params, trainer.net, policy='commitment', sss_by_regime=comm_sss.by_regime)
rc = residuals_check_switching_consistent(params, trainer.net, policy='commitment', sss_by_regime=comm_sss.by_regime)
print('Fixed-regime one-step check max |x_next-x| by regime (NOT Table-2 SSS):', {k:v.max_abs_state_diff for k,v in fp.items()})
print('Switching-consistent residual check max |res| by regime:', {k:v.max_abs_residual for k,v in rc.items()})
print('Residual keys:', list(next(iter(rc.values())).residuals.keys()))


In [None]:
# ---------- Save sanity checks ----------
save_json(os.path.join(run_dir, 'sanity_checks.json'), {
    'policy': 'commitment',
    'fixed_regime_one_step_max_abs_state_diff': {int(k): float(v.max_abs_state_diff) for k,v in fp.items()},
    'residual_max_abs': {int(k): float(v.max_abs_residual) for k,v in rc.items()},
    'residuals_by_regime': {int(k): {kk: float(vv) for kk,vv in v.residuals.items()} for k,v in rc.items()},
})

In [None]:
# ---------- Simulate (timeless commitment: start from SSS incl. lagged multipliers) ----------
# This produces sim_paths.npz used by Table 2 / figures.
B_sim = 2048
T_sim = 6000
burn_in_sim = 1000

x0_sim = trainer.simulate_initial_state(B_sim, commitment_sss=comm_sss.by_regime)
sim = simulate_paths(
    params=params,
    policy="commitment",
    net=trainer.net,
    T=T_sim,
    burn_in=burn_in_sim,
    x0=x0_sim,
    compute_implied_i=True,
    gh_n=7,
    thin=1,
    store_states=False,
)

save_npz(os.path.join(run_dir, "sim_paths.npz"), **sim)
print("Saved sim_paths:", os.path.join(run_dir, "sim_paths.npz"))
