# Baseline: ARIMA (Price-Only)

##01. Imports and Configuration

What this block does. Declares the script header, imports all dependencies, defines fixed splits and invariants, sets random seeds, configures Ljung–Box gates and AIC search policy, and initialises output directories and warnings.
Why it is needed. Centralising configuration guarantees reproducibility and keeps the ARIMA baseline aligned with the dissertation protocol.
Method choices. Fixed splits: Train = 2021-02-03 → 2022-12-30; Validation = 2023-01-03 → 2023-05-31; Test = 2023-06-01 → 2023-12-28 (n = 146). Target = Close.shift(-1) for policy comparison (naïve last-close used for U2). America/New_York with 16:00 cut-off assumed via upstream data creation. No scaling; univariate Close only; selection by train-only AIC with Ljung–Box double pass. Early stopping on Validation: not applicable. Test cadence: expanding-origin; monthly refit: not applicable here.

In [None]:
import os, sys, json, time, hashlib, random, platform, warnings, math, shutil, zipfile
from pathlib import Path
from typing import Tuple, Dict, Any, List, Optional
import numpy as np
import pandas as pd
from datetime import datetime, timezone
import json, shutil, sys


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import het_arch, acorr_ljungbox
from statsmodels.stats.stattools import jarque_bera
from statsmodels.tsa.stattools import kpss
from statsmodels.tools.sm_exceptions import ConvergenceWarning

# ---------------- Configuration (proposal-aligned) ----------------
MODEL_ID = "ARIMA"
TICKERS = ["AAPL", "AMZN", "MSFT", "TSLA", "AMD"]

# Fixed splits and invariants
TRAIN_START = "2021-02-03"
TRAIN_END   = "2022-12-30"
VAL_START   = "2023-01-03"
VAL_END     = "2023-05-31"
TEST_START  = "2023-06-01"
TEST_END    = "2023-12-28"
TEST_LEN    = 146
TIMEZONE    = "America/New_York"
MKT_CLOSE   = "16:00"

DA_EPS = 0.0010

# AIC grids and adaptive expansion policy
P_MAX_STEPS   = [4, 5, 6, 7]
D_GRID        = [0, 1, 2]
TRENDS_RAW    = ["none", "constant"]
DELTA_AIC_SEQ = [4.0, 5.0, 6.0]
TOP_K_SEQ     = [12, 16, 20]

# Ljung-Box whiteness double gate
LB_LAGS   = (10, 20)
LB_ALPHA  = 0.05
LB_DOUBLE_PASS_REQUIRED = True

SEED = 42
random.seed(SEED); np.random.seed(SEED)

OUT_ROOT   = Path("ARIMA_FINAL")
MODEL_ROOT = OUT_ROOT / MODEL_ID
OUT_ROOT.mkdir(parents=True, exist_ok=True); MODEL_ROOT.mkdir(parents=True, exist_ok=True)

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", message="Non-invertible")
warnings.filterwarnings("ignore", message="Non-stationary")
warnings.filterwarnings("ignore", message="Maximum Likelihood optimization failed")

from statsmodels.tools.sm_exceptions import InterpolationWarning
warnings.filterwarnings("ignore", category=InterpolationWarning)

##Outputs and Artefacts — Provenance Helpers

What this block does. Defines utilities to hash files and capture the runtime environment.
Why it is needed. Provenance manifests (environment and file hashes) are required for dissertation-grade reproducibility and the finalisation gate.
Method choices. Writes env_manifest.txt and file_hashes.json at the run root after artefacts are created.

In [None]:
# ---------------- Utility: provenance ----------------
def sha256_of_file(p: Path) -> str:
    h = hashlib.sha256()
    with p.open("rb") as f:
        for chunk in iter(lambda: f.read(1<<20), b""):
            h.update(chunk)
    return h.hexdigest()

def write_env_manifest(root: Path):
    lines = [
        f"python_version={sys.version.split()[0]}",
        f"platform={platform.platform()}",
        f"timestamp_utc={int(time.time())}",
        f"model_id={MODEL_ID}",
        f"seed={SEED}",
        f"timezone={TIMEZONE}",
        f"market_close={MKT_CLOSE}",
    ]
    try:
        import numpy, pandas, statsmodels
        lines += [f"numpy={numpy.__version__}", f"pandas={pandas.__version__}", f"statsmodels={statsmodels.__version__}"]
    except Exception:
        pass
    (root / "env_manifest.txt").write_text("\n".join(lines) + "\n", encoding="utf-8")

def write_file_hashes(root: Path):
    rows = []
    for r, _, files in os.walk(root):
        for fn in files:
            p = Path(r) / fn
            rel = p.relative_to(root)
            rows.append({"path": str(rel).replace("\\", "/"), "sha256": sha256_of_file(p)})
    (root / "file_hashes.json").write_text(json.dumps({"files": rows}, indent=2), encoding="utf-8")

##Data Loading

What this block does. Loads per-ticker CSVs and splits them into Train, Validation, and Test windows.
Why it is needed. Enforces the fixed temporal protocol and guards the Test length (n = 146) and boundaries.
Method choices. Expects columns date and Close; sorts by date; asserts Test window equals 2023-06-01 → 2023-12-28. America/New_York 16:00 cut-off assumed to be handled in data preparation.

In [None]:
# ---------------- Data IO ----------------
def load_input_csv(ticker: str) -> pd.DataFrame:
    p = Path(f"{ticker}_input.csv")
    if not p.exists():
        raise FileNotFoundError(f"Missing {p.name} next to the script.")
    df = pd.read_csv(p)
    if not {"date", "Close"}.issubset(df.columns):
        raise AssertionError(f"{p.name} missing required columns: 'date', 'Close'.")
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date").reset_index(drop=True)
    return df[["date", "Close"]].copy()

def split_data(df: pd.DataFrame):
    tr = df[(df["date"] >= pd.to_datetime(TRAIN_START)) & (df["date"] <= pd.to_datetime(TRAIN_END))].copy().reset_index(drop=True)
    va = df[(df["date"] >= pd.to_datetime(VAL_START)) & (df["date"] <= pd.to_datetime(VAL_END))].copy().reset_index(drop=True)
    te = df[(df["date"] >= pd.to_datetime(TEST_START)) & (df["date"] <= pd.to_datetime(TEST_END))].copy().reset_index(drop=True)
    assert len(te) == TEST_LEN, f"Test must be {TEST_LEN} rows, got {len(te)}"
    assert str(te["date"].iloc[0].date()) == TEST_START and str(te["date"].iloc[-1].date()) == TEST_END, "Test window mismatch"
    return tr, va, te

##Preprocessing

What this block does. Provides metric utilities and trading-diagnostic helpers used later.
Why it is needed. Ensures consistent evaluation and trading-style summaries across model families.
Method choices. No scaling in ARIMA; zero-preservation for Tw_/Rd_/Nw_SP500_* not applicable (no exogenous features). DA_ε computed on returns with ε = 0.0010. Target concept: next-day close for comparison metrics; ARIMA itself predicts level. ES on Validation: not applicable.

In [None]:
# ---------------- Metrics ----------------
def rmse(a, b): a=np.asarray(a,dtype=np.float64); b=np.asarray(b,dtype=np.float64); return float(np.sqrt(np.mean((a-b)**2)))
def mae(a, b):  a=np.asarray(a,dtype=np.float64); b=np.asarray(b,dtype=np.float64); return float(np.mean(np.abs(a-b)))
def theils_u2(pred, actual, naive): return rmse(pred, actual) / max(rmse(naive, actual), 1e-12)

def directional_accuracy_eps_from_levels(pred_levels, actual_levels, eps=DA_EPS):
    pred = np.asarray(pred_levels, dtype=np.float64)
    act  = np.asarray(actual_levels, dtype=np.float64)
    prev = np.concatenate([[act[0]], act[:-1]])
    ret_pred = (pred - prev) / np.where(prev == 0.0, 1.0, prev)
    ret_act  = (act  - prev) / np.where(prev == 0.0, 1.0, prev)
    mask = np.abs(ret_act) > eps
    if mask.sum() == 0: return float("nan")
    return float(np.mean(np.sign(ret_pred[mask]) == np.sign(ret_act[mask])))

def sharpe_maxdd_turnover_from_levels(pred_levels, actual_levels, cost_bps=0):
    pred = np.asarray(pred_levels, dtype=np.float64)
    act  = np.asarray(actual_levels, dtype=np.float64)
    prev = np.concatenate([[act[0]], act[:-1]])
    ret_act = (act - prev) / np.where(prev == 0.0, 1.0, prev)
    prd = np.diff(pred)
    sig = np.where(prd > 0, 1, np.where(prd < 0, -1, 0))           # length n-1
    change = np.concatenate([[int(sig[0] != 0)], (sig[1:] != sig[:-1]).astype(int)]) if len(sig) else np.array([0], dtype=int)
    strat = sig * ret_act[1:] - (cost_bps/10000.0) * change
    eq = np.concatenate([[1.0], (1.0 + strat).cumprod()])
    r = np.diff(eq)
    sharpe = float(np.mean(r) / (np.std(r, ddof=1) + 1e-12)) if len(r) > 1 else float("nan")
    peak = np.maximum.accumulate(eq)
    maxdd = float(np.max(1.0 - eq/peak)) if len(eq) > 1 else 0.0
    turnover = int(np.sum(change))
    return sharpe, maxdd, turnover

## Model Definition

What this block does. Defines ARIMA fitting, train-only AIC grid search, Ljung–Box screen, and adaptive selection (p-max expansion and δAIC/Top-K bands).
Why it is needed. Separates model specification from evaluation and enforces a principled train-only selection policy..
Method choices. AIC grid over p,d,q and trend ∈ {none, constant}; adaptive expansion; mandatory Ljung–Box double pass on Train and Train+Val.

In [None]:
# ---------------- Fitting and selection ----------------
def fit_once(y: np.ndarray, order: Tuple[int,int,int], trend_raw: str):
    m = SARIMAX(endog=y, order=order, trend=None if trend_raw=="none" else "c",
                enforce_stationarity=False, enforce_invertibility=False)
    return m.fit(disp=False)

def aic_grid(y_train: np.ndarray, p_max: int) -> pd.DataFrame:
    rows = []
    for p in range(p_max+1):
        for d in D_GRID:
            for q in range(p_max+1):
                for tr in TRENDS_RAW:
                    try:
                        res  = fit_once(y_train, (p,d,q), tr)
                        rows.append({"p":p,"d":d,"q":q,"trend":tr,"AIC":float(res.aic)})
                    except Exception:
                        rows.append({"p":p,"d":d,"q":q,"trend":tr,"AIC":np.inf})
    grid = pd.DataFrame(rows).sort_values("AIC").reset_index(drop=True)
    return grid

def lb_pvalues_from_resid(resid: np.ndarray, lags=(10,20)) -> Dict[str, float]:
    out = {}
    for lag in lags:
        df = acorr_ljungbox(resid, lags=[lag], return_df=True)
        out[f"p{lag}"] = float(df["lb_pvalue"].iloc[0])
    return out

def candidate_passes_lb(y_train: np.ndarray, y_trainval: np.ndarray, order, trend) -> Dict[str, Any]:
    out = {"train": {}, "trainval": {}, "double_pass": False}
    res_tr   = fit_once(y_train, order, trend)
    resid_tr = np.asarray(res_tr.resid, dtype=np.float64)
    p_tr     = lb_pvalues_from_resid(resid_tr, lags=LB_LAGS)
    out["train"] = p_tr
    res_tv   = fit_once(y_trainval, order, trend)
    resid_tv = np.asarray(res_tv.resid, dtype=np.float64)
    p_tv     = lb_pvalues_from_resid(resid_tv, lags=LB_LAGS)
    out["trainval"] = p_tv
    pass_tr  = all(p > LB_ALPHA for p in p_tr.values())
    pass_tv  = all(p > LB_ALPHA for p in p_tv.values())
    out["double_pass"] = (pass_tr and pass_tv) if LB_DOUBLE_PASS_REQUIRED else pass_tr
    return out

def select_order_train_aic_lb_adaptive(y_train: np.ndarray, y_trainval: np.ndarray) -> Tuple[Tuple[int,int,int], str, float, Dict[str, Any], pd.DataFrame, pd.DataFrame]:
    final_grid = None
    top5_final = None
    last_best  = ((0,1,0), "none", math.inf)
    for pmax in P_MAX_STEPS:
        grid = aic_grid(y_train, pmax)
        if final_grid is None: final_grid = grid.copy()
        best_row = grid.iloc[0]
        last_best = ((int(best_row.p), int(best_row.d), int(best_row.q)), str(best_row.trend), float(best_row.AIC))
        for delta_aic, top_k in zip(DELTA_AIC_SEQ, TOP_K_SEQ):
            band = grid[grid["AIC"] <= best_row.AIC + delta_aic].head(top_k).to_dict(orient="records")
            passing = []
            for row in band:
                order = (int(row["p"]), int(row["d"]), int(row["q"]))
                tr    = str(row["trend"])
                try:
                    lb_info = candidate_passes_lb(y_train, y_trainval, order, tr)
                except Exception:
                    continue
                if lb_info["double_pass"]:
                    passing.append((row, lb_info))
            if passing:
                passing.sort(key=lambda tup: (tup[0]["AIC"], tup[0]["p"]+tup[0]["q"]+tup[0]["d"], tup[0]["p"]+tup[0]["q"], tup[0]["d"], tup[0]["p"], tup[0]["q"]))
                chosen_row, lb_info = passing[0]
                order = (int(chosen_row["p"]), int(chosen_row["d"]), int(chosen_row["q"]))
                trend = str(chosen_row["trend"])
                sel_meta = {
                    "lb_screen_pass": True,
                    "lb_double_pass": True,
                    "lb_train":  {"p10": lb_info["train"].get("p10"),  "p20": lb_info["train"].get("p20")},
                    "lb_trainval": {"p10": lb_info["trainval"].get("p10"), "p20": lb_info["trainval"].get("p20")},
                    "grid_pmax": pmax,
                    "delta_aic_used": float(delta_aic),
                    "top_k_used": int(top_k)
                }
                top5_final = grid.head(5).copy()
                # Return TRAIN AIC of chosen spec
                return order, trend, float(chosen_row["AIC"]), sel_meta, final_grid, top5_final
        final_grid = grid.copy()
        top5_final = grid.head(5).copy()
    order, trend, aic_best = last_best
    sel_meta = {
        "lb_screen_pass": False,
        "lb_double_pass": False,
        "grid_pmax": P_MAX_STEPS[-1],
        "delta_aic_used": float(DELTA_AIC_SEQ[-1]),
        "top_k_used": int(TOP_K_SEQ[-1])
    }
    return order, trend, float(aic_best), sel_meta, final_grid, top5_final

##Training

What this block does. Implements one-step-ahead forecasting used in walk-forward evaluation.
Why it is needed. Re-estimates parameters on each expanding history during Test to mimic operational deployment.
Method choices. Expanding-origin; 95% and 80% bands from model CIs when available, otherwise Gaussian fallback from model scale. Monthly refit: not applicable here.

In [None]:
# ---------------- Forecasting ----------------
Z95 = 1.959963984540054
Z80 = 1.2815515655446004

def forecast_with_bands(res):
    fc = res.get_forecast(steps=1)
    mean = float(np.asarray(fc.predicted_mean)[0])
    try:
        ci95 = fc.conf_int(alpha=0.05); lo95 = float(np.asarray(ci95)[0,0]); hi95 = float(np.asarray(ci95)[0,1])
    except Exception:
        lo95 = hi95 = None
    try:
        ci80 = fc.conf_int(alpha=0.20); lo80 = float(np.asarray(ci80)[0,0]); hi80 = float(np.asarray(ci80)[0,1])
    except Exception:
        lo80 = hi80 = None
    if None not in (lo95, hi95, lo80, hi80):
        return mean, lo95, hi95, lo80, hi80
    se = float(np.sqrt(getattr(res, "scale", 1.0)))
    if not np.isfinite(se) or se <= 0: se = max(1e-6, 0.001*abs(mean))
    return mean, mean - Z95*se, mean + Z95*se, mean - Z80*se, mean + Z80*se

def walk_forward(y_all: np.ndarray, idx_val_end: int, order: Tuple[int,int,int], trend_raw: str):
    n = len(y_all)
    means, lo95, hi95, lo80, hi80 = [], [], [], [], []
    for t in range(idx_val_end, n):
        y_hist = y_all[:t]
        res = fit_once(y_hist, order, trend_raw)
        m, l95, u95, l80, u80 = forecast_with_bands(res)
        means.append(m); lo95.append(l95); hi95.append(u95); lo80.append(l80); hi80.append(u80)
    return np.array(means), np.array(lo95), np.array(hi95), np.array(lo80), np.array(hi80)

##Evaluation

What this block does. Runs the per-ticker pipeline: selection, diagnostics on Train+Val, expanding-origin Test forecasts, metrics (RMSE, MAE, U2, DA_ε, Coverage, Sharpe/MaxDD at 0/10 bps, Turnover), and residual summary.
Why it is needed. Produces dissertation-ready measurements and diagnostics.
Method choices. Naïve baseline for U2 = last Validation close plus Test lag; DA_ε computed on returns with ε = 0.0010; Coverage from 95% bands; trading diagnostics via pure-sign rule on percentage returns; n must equal 146. No scaling; zero-preservation not applicable.

In [None]:
# ---------------- Per-ticker run ----------------
def run_ticker(ticker: str) -> Dict[str, Any]:
    t_dir = (MODEL_ROOT / ticker); t_dir.mkdir(parents=True, exist_ok=True)

    # Load and split
    df = load_input_csv(ticker)
    tr, va, te = split_data(df)
    y_tr = tr["Close"].to_numpy(np.float64)
    y_va = va["Close"].to_numpy(np.float64)
    y_te = te["Close"].to_numpy(np.float64)
    y_trva = np.concatenate([y_tr, y_va], axis=0)
    y_all  = np.concatenate([y_tr, y_va, y_te], axis=0)
    idx_val_end = len(y_tr) + len(y_va)
    dates_test = te["date"].dt.strftime("%Y-%m-%d").to_numpy()

    # Selection (returns TRAIN AIC of chosen spec)
    order, trend_raw, aic_train, sel_meta, aic_grid_df, aic_top5 = select_order_train_aic_lb_adaptive(y_tr, y_trva)

    # Diagnostics on Train+Val for chosen spec
    res_in = fit_once(y_all[:idx_val_end], order, trend_raw)
    resid_tv = np.asarray(res_in.resid, dtype=np.float64)
    diagnostics = {}

    # Ljung-Box on Train+Val
    try:
        lb10 = acorr_ljungbox(resid_tv, lags=[LB_LAGS[0]], return_df=True)
        lb20 = acorr_ljungbox(resid_tv, lags=[LB_LAGS[1]], return_df=True)
        diagnostics["ljung_box_trainval"] = {
            f"lag{LB_LAGS[0]}": {"stat": float(lb10["lb_stat"].iloc[0]), "p": float(lb10["lb_pvalue"].iloc[0])},
            f"lag{LB_LAGS[1]}": {"stat": float(lb20["lb_stat"].iloc[0]), "p": float(lb20["lb_pvalue"].iloc[0])},
        }
    except Exception as e:
        diagnostics["ljung_box_trainval"] = {"error": str(e)}

    # Jarque-Bera on Train+Val
    try:
        jb_stat, jb_p, _, _ = jarque_bera(resid_tv)
        diagnostics["jarque_bera_trainval"] = {"stat": float(jb_stat), "p": float(jb_p)}
    except Exception as e:
        diagnostics["jarque_bera_trainval"] = {"error": str(e)}

    # ARCH-LM on Train+Val
    try:
        lm_stat, lm_p, f_stat, f_p = het_arch(resid_tv, nlags=10)
        diagnostics["arch_lm_trainval"] = {"lm_stat": float(lm_stat), "lm_p": float(lm_p), "f_stat": float(f_stat), "f_p": float(f_p)}
    except Exception as e:
        diagnostics["arch_lm_trainval"] = {"error": str(e)}

    # KPSS on Train+Val
    try:
        kpss_stat, kpss_p, kpss_lags, kpss_crit = kpss(resid_tv, regression="c", nlags="auto")
        diagnostics["kpss_trainval"] = {
            "stat": float(kpss_stat),
            "p": float(kpss_p),
            "lag": int(kpss_lags),
            "crit": {str(k): float(v) for k, v in kpss_crit.items()}
        }
    except Exception as e:
        diagnostics["kpss_trainval"] = {"error": str(e)}

    # Walk-forward on Test
    y_hat, lo95, hi95, lo80, hi80 = walk_forward(y_all, idx_val_end, order, trend_raw)

    # Metrics
    naive = np.concatenate([[y_va[-1]], y_te[:-1]])
    RMSE = rmse(y_hat, y_te)
    MAE  = mae(y_hat, y_te)
    U2   = theils_u2(y_hat, y_te, naive)
    DA_epsilon = directional_accuracy_eps_from_levels(y_hat, y_te, eps=DA_EPS)
    Coverage95 = float(np.mean((y_te >= lo95) & (y_te <= hi95)))
    nobs = int(len(y_hat))
    sh0, mdd0, turn0 = sharpe_maxdd_turnover_from_levels(y_hat, y_te, cost_bps=0)
    sh10, mdd10, turn10 = sharpe_maxdd_turnover_from_levels(y_hat, y_te, cost_bps=10)

    # Residual summary on Test
    resid_test = y_te - y_hat
    resid_summary = {
        "mean": float(np.mean(resid_test)),
        "std": float(np.std(resid_test, ddof=1)) if nobs > 1 else float("nan"),
        "min": float(np.min(resid_test)) if nobs > 0 else float("nan"),
        "max": float(np.max(resid_test)) if nobs > 0 else float("nan")
    }
    diagnostics["residuals_test_summary"] = resid_summary

    # Predictions CSV (with 95% and 80%)
    pred_df = pd.DataFrame({
        "date": dates_test,
        "y_true": y_te,
        "y_hat": y_hat,
        "residual": resid_test,
        "in_sample_flag": np.zeros_like(y_te, dtype=int),
        "lower_95": lo95, "upper_95": hi95,
        "lower_80": lo80, "upper_80": hi80
    })
    assert len(pred_df) == TEST_LEN
    pred_df.to_csv(t_dir / f"predictions_{MODEL_ID}_{ticker}.csv", index=False)

    # Appendix 80 percent intervals
    appendix_80 = pd.DataFrame({
        "date": dates_test,
        "y_hat": y_hat,
        "lower_80": lo80,
        "upper_80": hi80
    })
    appendix_80.to_csv(t_dir / f"appendix_intervals80_{MODEL_ID}_{ticker}.csv", index=False)

    # Metrics JSON
    metrics = {
        "RMSE": RMSE, "MAE": MAE, "U2": U2, "DA_epsilon": DA_epsilon,
        "Coverage": Coverage95, "n": nobs,
        "Sharpe_0bps": sh0, "Sharpe_10bps": sh10,
        "MaxDD_0bps": mdd0, "MaxDD_10bps": mdd10,
        "Turnover": int(turn0)  # parity field, cost-agnostic count
    }
    (t_dir / f"metrics_{MODEL_ID}_{ticker}.json").write_text(json.dumps(metrics, indent=2), encoding="utf-8")

    # run_config JSON (with AIC_train recorded explicitly)
    trend_short = "n" if trend_raw == "none" else "c"
    run_cfg = {
        "model_id": MODEL_ID,
        "ticker": ticker,
        "order": [int(order[0]), int(order[1]), int(order[2])],
        "trend": trend_short,
        "AIC_train": float(aic_train),           # <-- added per checklist
        "AIC": float(aic_train),                 # retained for backward-compat
        "features_used": ["Close"],
        "seed": SEED,
        "seeds": {"global": SEED},
        "timezone": TIMEZONE,
        "market_close": MKT_CLOSE,
        "refit_policy": "expanding-origin",
        "refit_protocol": "expanding_origin_one_step",
        "cadence": "expanding_origin",
        "expanding_origin": True,
        "target_definition": "Close.shift(-1) created after all features",
        "target_col": "Close",
        "policy": {"y_scaled": False, "exog_used": False},
        "splits": {
            "train_start": TRAIN_START, "train_end": TRAIN_END,
            "val_start": VAL_START,     "val_end": VAL_END,
            "test_start": TEST_START,   "test_end": TEST_END
        },
        "selection": {"train_only_aic": True, "lb_double_pass": LB_DOUBLE_PASS_REQUIRED, **sel_meta}
    }
    (t_dir / f"run_config_{MODEL_ID}_{ticker}.json").write_text(json.dumps(run_cfg, indent=2), encoding="utf-8")

    # Diagnostics JSON
    (t_dir / f"diagnostics_{MODEL_ID}_{ticker}.json").write_text(json.dumps(diagnostics, indent=2), encoding="utf-8")

    # Appendix AIC grids
    aic_grid_df.to_csv(t_dir / f"appendix_aic_grid_{ticker}.csv", index=False)
    aic_top5.to_csv(t_dir / f"appendix_aic_top5_{ticker}.csv", index=False)

    lb_tv = diagnostics.get("ljung_box_trainval", {})
    tv_p10 = lb_tv.get(f"lag{LB_LAGS[0]}", {}).get("p", None)
    tv_p20 = lb_tv.get(f"lag{LB_LAGS[1]}", {}).get("p", None)
    whiteness_pass = (tv_p10 is not None and tv_p20 is not None and tv_p10 > LB_ALPHA and tv_p20 > LB_ALPHA)
    return {
        "ticker": ticker,
        "order": list(order),
        "trend": trend_short,
        "AIC_train": float(aic_train),
        "AIC": float(aic_train),
        "LB_train_p10": run_cfg["selection"].get("lb_train", {}).get("p10"),
        "LB_train_p20": run_cfg["selection"].get("lb_train", {}).get("p20"),
        "LB_trainval_p10": run_cfg["selection"].get("lb_trainval", {}).get("p10"),
        "LB_trainval_p20": run_cfg["selection"].get("lb_trainval", {}).get("p20"),
        "LB_tv_whiteness_pass": bool(whiteness_pass),
        "residual_mean_test": diagnostics["residuals_test_summary"]["mean"]
    }

##Outputs and Artefacts

What this block does. Orchestrates runs across tickers, writes provenance manifests and family summary, produces a finalisation gate note, and packages a portable zip after normalising metrics and run_config fields.
Why it is needed. Consolidates outcomes and ensures the bundle is submission-ready and reproducible.
Method choices. Cadence recorded as expanding-origin with explicit refit_protocol. Packaging fixer recomputes Coverage and Turnover, fills seeds, normalises fields, preserves AIC_train, rebuilds file hashes, and zips ARIMA_FINAL_bundle.zip.

In [None]:
# ============================ Outputs and Artefacts (one-cell orchestration) ============================

# ---------- Orchestration ----------
def main():
    per_ticker: List[Dict[str, Any]] = []
    for t in TICKERS:
        per_ticker.append(run_ticker(t))

    # Provenance manifests (assumes these helpers exist upstream)
    try:
        write_env_manifest(OUT_ROOT)
    except Exception:
        pass
    try:
        write_file_hashes(OUT_ROOT)
    except Exception:
        pass

    family = {
        "model_id": MODEL_ID,
        "tickers": per_ticker,
        "family_finalisable": all(x.get("LB_tv_whiteness_pass", False) for x in per_ticker),
        "gate_policy": {
            "lb_double_pass_required": LB_DOUBLE_PASS_REQUIRED,
            "lb_lags": list(LB_LAGS),
            "alpha": LB_ALPHA,
            "train_only_aic": True,
            "adaptive_grid_pmax": P_MAX_STEPS,
            "delta_aic_seq": DELTA_AIC_SEQ,
            "top_k_seq": TOP_K_SEQ
        }
    }
    (OUT_ROOT / "family_summary_ARIMA.json").write_text(json.dumps(family, indent=2), encoding="utf-8")

    lines = ["ARIMA family finalisation:",
             f"- Family finalisable: {family['family_finalisable']}",
             "Per-ticker whiteness on Train+Val and residual mean on Test:"]
    for x in per_ticker:
        lines.append(
            f"  {x['ticker']}: pass={x.get('LB_tv_whiteness_pass')} "
            f"(Train p10={x.get('LB_train_p10')}, p20={x.get('LB_train_p20')}; "
            f"Train+Val p10={x.get('LB_trainval_p10')}, p20={x.get('LB_trainval_p20')}); "
            f"AIC_train={x.get('AIC_train', float('nan')):.3f}; residual_mean_test={x.get('residual_mean_test', float('nan')):.6f}"
        )
    gate_text = "\n".join(lines) + "\n"
    (OUT_ROOT / "finalisation_gate.txt").write_text(gate_text, encoding="utf-8")
    print(gate_text, end="")

# ---------- Packaging helpers ----------
def _iter_json_files(root, pattern) -> List[Path]:
    root = root if isinstance(root, Path) else Path(root)
    if not root.exists():
        return []
    return list(root.rglob(pattern))

def _iter_csv_files(root, pattern) -> List[Path]:
    root = root if isinstance(root, Path) else Path(root)
    if not root.exists():
        return []
    return list(root.rglob(pattern))

def _infer_ticker_from_path(p: Path) -> Optional[str]:
    cand = p.parent.name.upper()
    if cand.isalpha() and 3 <= len(cand) <= 5:
        return cand
    return None

def _safe_read_json(path: Path) -> Optional[Dict[str, Any]]:
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return None

def _safe_write_json(path: Path, obj: Dict[str, Any]):
    path.write_text(json.dumps(obj, indent=2), encoding="utf-8")

def _recompute_coverage_and_turnover(pred_csv: Path) -> Tuple[Optional[float], Optional[int]]:
    # Returns (coverage_95, turnover_count) computed on Test rows if in_sample_flag is available; else all rows.
    try:
        df = pd.read_csv(pred_csv)
    except Exception:
        return (None, None)

    # Select evaluation slice (prefer Test)
    if "in_sample_flag" in df.columns:
        eval_df = df[df["in_sample_flag"] == False].copy()
        if eval_df.empty:
            eval_df = df.copy()
    else:
        eval_df = df.copy()

    # Coverage(95) if bands present
    cov = None
    if {"y_true", "lower_95", "upper_95"}.issubset(eval_df.columns):
        within = (eval_df["y_true"] >= eval_df["lower_95"]) & (eval_df["y_true"] <= eval_df["upper_95"])
        cov = float(within.mean()) if len(eval_df) else None

    # Turnover from pure-sign rule: position_t = sign(ŷ_{t+1} − y_t).
    # We need y_t (previous true). Prefer provided y_prev; otherwise compute from y_true.shift(1).
    turn = None
    if {"y_true", "y_hat"}.issubset(eval_df.columns):
        if "y_prev" in eval_df.columns:
            y_prev = eval_df["y_prev"]
        else:
            y_prev = eval_df["y_true"].shift(1)
        # If any NaNs, drop first row for turnover calc
        pos = (eval_df["y_hat"] - y_prev).apply(lambda x: 0.0 if pd.isna(x) else (1.0 if x > 0 else (-1.0 if x < 0 else 0.0)))
        pos = pos.fillna(0.0)
        # Count switches (ignore the very first position which has no previous)
        switches = (pos != pos.shift(1)).astype(int)
        # The first comparison is NaN -> fill 0
        switches.iloc[0] = 0
        turn = int(switches.sum())
    return (cov, turn)

def _update_metrics_from_predictions(root: Path) -> Dict[str, int]:
    """Recompute Coverage(95) and Turnover for each metrics_*.json where matching predictions exist.
       Non-destructive: only fills/updates 'Coverage' and 'Turnover' if we can compute them."""
    updated = 0
    preds_by_ticker: Dict[str, Path] = {}

    # Map tickers to their predictions files (choose the one in the same ticker folder if multiple)
    for p in _iter_csv_files(root, "predictions_*.csv"):
        tk = _infer_ticker_from_path(p)
        if tk and tk not in preds_by_ticker:
            preds_by_ticker[tk] = p

    for mf in _iter_json_files(root, "metrics_*.json"):
        obj = _safe_read_json(mf)
        if obj is None:
            continue
        tk = obj.get("ticker") or _infer_ticker_from_path(mf)
        pred_path = preds_by_ticker.get(tk) if tk else None
        if pred_path and pred_path.exists():
            cov, turn = _recompute_coverage_and_turnover(pred_path)
            changed = False
            if cov is not None:
                # Metrics key name per your spec: 'Coverage' (95%)
                if obj.get("Coverage") != cov:
                    obj["Coverage"] = cov
                    changed = True
            if turn is not None:
                if obj.get("Turnover") != turn:
                    obj["Turnover"] = turn
                    changed = True
            if changed:
                _safe_write_json(mf, obj)
                updated += 1
    return {"metrics_recomputed": updated}

def _normalise_metadata(root: Path) -> Dict[str, int]:
    touched_metrics = touched_runconfig = 0
    # run_config_*.json
    for jf in _iter_json_files(root, "run_config_*.json"):
        obj = _safe_read_json(jf)
        if obj is None:
            continue
        changed = False
        if "model_id" not in obj:
            obj["model_id"] = MODEL_ID
            changed = True
        if "ticker" not in obj:
            tk = _infer_ticker_from_path(jf)
            if tk:
                obj["ticker"] = tk
                changed = True
        # Preserve AIC_train/seeds if present; do not overwrite.
        if changed:
            _safe_write_json(jf, obj)
            touched_runconfig += 1
    # metrics_*.json
    for jf in _iter_json_files(root, "metrics_*.json"):
        obj = _safe_read_json(jf)
        if obj is None:
            continue
        changed = False
        if "model_id" not in obj:
            obj["model_id"] = MODEL_ID
            changed = True
        if "ticker" not in obj:
            tk = _infer_ticker_from_path(jf)
            if tk:
                obj["ticker"] = tk
                changed = True
        if changed:
            _safe_write_json(jf, obj)
            touched_metrics += 1
    return {"metrics": touched_metrics, "run_config": touched_runconfig}

def _rebuild_file_hashes_root():
    try:
        write_file_hashes(OUT_ROOT if isinstance(OUT_ROOT, Path) else Path(OUT_ROOT))
    except Exception:
        pass

def _build_bundle(zip_name: str = "ARIMA_FINAL_bundle.zip", show_link: bool = True) -> Path:
    root = OUT_ROOT if isinstance(OUT_ROOT, Path) else Path(OUT_ROOT)
    if not root.exists():
        raise FileNotFoundError(f"OUT_ROOT not found: {root}")

    zpath = Path(zip_name)
    if zpath.exists():
        zpath.unlink()
    shutil.make_archive(zpath.stem, "zip", root)

    preds = len(list(root.rglob("predictions_*.csv")))
    mets  = len(list(root.rglob("metrics_*.json")))
    cfgs  = len(list(root.rglob("run_config_*.json")))
    # Permissive: matches diagnostics_ARIMA_*.json, diagnostics_test_ARIMA_*.json, etc.
    diags = len(list(root.rglob("diagnostics*ARIMA*.json")))

    ts = datetime.now(timezone.utc).isoformat(timespec="seconds")
    print(
        f"Bundle: {zpath} | {ts}\n"
        f"Counts -> predictions={preds}, metrics={mets}, run_config={cfgs}, diagnostics={diags}"
    )

    if show_link:
        try:
            from IPython.display import FileLink, display, HTML
            display(FileLink(str(zpath)))
            display(HTML(f'<a href="{zpath}" download>{zpath.name} (click to download)</a>'))
        except Exception:
            pass

    try:
        print("Bundle absolute path:", Path(zpath).resolve())
    except Exception:
        pass

    # Optional forced download for Colab
    try:
        from google.colab import files
        files.download(str(zpath))
    except Exception:
        pass

    return zpath

# ---------- Single tail ----------
if __name__ == "__main__":
    main()
    # Normalise metadata, recompute Coverage/Turnover from predictions (non-destructive), rebuild hashes, zip
    changes_meta = _normalise_metadata(OUT_ROOT)
    changes_cov_turn = _update_metrics_from_predictions(OUT_ROOT)
    _rebuild_file_hashes_root()
    _build_bundle("ARIMA_FINAL_bundle.zip", show_link=True)
    print(
        f"Patched files -> metrics_meta={changes_meta['metrics']}, "
        f"run_config_meta={changes_meta['run_config']}, "
        f"metrics_recomputed={changes_cov_turn['metrics_recomputed']}"
    )

ARIMA family finalisation:
- Family finalisable: True
Per-ticker whiteness on Train+Val and residual mean on Test:
  AAPL: pass=True (Train p10=0.9998975717071399, p20=0.9999984758023575; Train+Val p10=0.9999006118380567, p20=0.9999960601192237); AIC_train=2366.368; residual_mean_test=0.120288
  AMZN: pass=True (Train p10=0.999999921814528, p20=0.9999997725942792; Train+Val p10=0.9999993317890863, p20=0.9999984546723262); AIC_train=2515.710; residual_mean_test=0.212717
  MSFT: pass=True (Train p10=0.9999996424433911, p20=0.9999999965619389; Train+Val p10=0.9999984873504009, p20=0.9999999570808367); AIC_train=2883.122; residual_mean_test=0.358507
  TSLA: pass=True (Train p10=0.09608079882323414, p20=0.4555473677092405; Train+Val p10=0.06561186493274906, p20=0.3313120670256253); AIC_train=3553.322; residual_mean_test=0.277793
  AMD: pass=True (Train p10=0.9961477440656766, p20=0.9996491629109464; Train+Val p10=0.981515533167941, p20=0.9989028029819025); AIC_train=2522.870; residual_mean_

Bundle absolute path: /content/ARIMA_FINAL_bundle.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Patched files -> metrics_meta=5, run_config_meta=0, metrics_recomputed=5
