##01. Imports and Configuration

This block imports all dependencies, sets deterministic seeds and cuDNN flags, configures device selection, and defines fixed splits and hyperparameters. It is required to ensure reproducibility and parity: train-only scaling, fixed splits (2021-02-03→2022-12-30, 2023-01-03→2023-05-31, 2023-06-01→2023-12-28), Target = Close.shift(-1) upstream, America/New_York 16:00 cut-off assumed upstream, early stopping on Validation, and monthly refit on Test.

In [None]:
import os, sys, json, time, math, hashlib, random, zipfile, shutil, platform, warnings
from pathlib import Path
from typing import List, Dict, Tuple, Optional
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

# ====================== Config ======================

RANDOM_SEED = 42
random.seed(RANDOM_SEED); np.random.seed(RANDOM_SEED); torch.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TICKERS = ["AAPL","AMZN","MSFT","TSLA","AMD"]
DATA_DIR = Path(".")
RUN_ROOT = Path("transformer_outputs") / "TRANSFORMER"
RUN_ROOT.mkdir(parents=True, exist_ok=True)

TRAIN_START = "2021-02-03"
TRAIN_END   = "2022-12-30"
VAL_START   = "2023-01-03"
VAL_END     = "2023-05-31"
TEST_START  = "2023-06-01"
TEST_END    = "2023-12-28"

# Window length (parity with LSTM/Hybrid)
L = 90

# Optimisation
BATCH_SIZE = 64
MAX_EPOCHS = 200
ES_PATIENCE = 20
LR = 1e-3
WEIGHT_DECAY = 0.0
GRAD_CLIP = 1.0

# Transformer
D_MODEL = 64
NHEAD = 4
DIM_FF = 128
DEPTH = 2
DROPOUT = 0.1

# Metrics policy
EPSILON_DA = 0.0010            # for DA epsilon-gating
TRADING_COSTS = [0.0, 0.0010]  # 0 bps, 10 bps (decimal return terms)

# Feature families
SENTIMENT_PREFIXES = ("Tw_", "Rd_", "Nw_SP500_")

## Outputs and Artefacts (helpers)

This block provides helper functions for provenance capture and packaging. It writes env_manifest.txt and file_hashes.json at the run root, and builds a zip for submission. These are required to ensure reproducibility and verifiable artefacts.

In [None]:
# ====================== IO helpers ======================

def sha256_file(p: Path) -> str:
    h = hashlib.sha256()
    with p.open("rb") as fh:
        for chunk in iter(lambda: fh.read(1 << 20), b""):
            h.update(chunk)
    return h.hexdigest()

def write_env_manifest(root: Path):
    f = root / "env_manifest.txt"
    lines = [
        f"timestamp_utc={time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
        f"python={sys.version.split()[0]}",
        f"platform={platform.platform()}",
        f"torch={getattr(torch, '__version__', 'unknown')}",
        f"numpy={getattr(np, '__version__', 'unknown')}",
        f"pandas={getattr(pd, '__version__', 'unknown')}",
    ]
    f.write_text("\n".join(lines))

def write_file_hashes(root: Path):
    mapping = {}
    for p in root.rglob("*"):
        if p.is_file():
            rel = p.relative_to(root).as_posix()
            try:
                mapping[rel] = {"sha256": sha256_file(p), "size": p.stat().st_size, "mtime": int(p.stat().st_mtime)}
            except Exception:
                mapping[rel] = {"sha256": None, "size": None, "mtime": None}
    (root / "file_hashes.json").write_text(json.dumps(mapping, indent=2))

def package_zip(run_root: Path) -> Path:
    out_dir = Path("outputs") / "transformer"
    out_dir.mkdir(parents=True, exist_ok=True)
    base = out_dir / "transformer"
    if base.with_suffix(".zip").exists():
        base.with_suffix(".zip").unlink()
    shutil.make_archive(str(base), "zip", root_dir=str(run_root.parent), base_dir=run_root.name)
    (out_dir / "transformer.sha256").write_text(f"{sha256_file(base.with_suffix('.zip'))}  transformer.zip")
    return base.with_suffix(".zip")

def maybe_download_zip(zip_path: Path):
    try:
        from google.colab import files  # type: ignore
        files.download(str(zip_path))
    except Exception:
        print(f"Archive ready: {zip_path}  (sha256: {sha256_file(zip_path)[:12]})")

## Data Loading

This block loads each ticker’s input CSV, ensures chronological order, and selects feature columns. Assumes upstream created Target = Close.shift(-1) after all features and aligned to America/New_York 16:00 cut-off.

In [None]:
# ====================== Data & scaling ======================

def read_ticker_df(ticker: str) -> pd.DataFrame:
    p = DATA_DIR / f"{ticker}_input.csv"
    if not p.exists():
        raise FileNotFoundError(f"Missing input CSV for {ticker}: {p}")
    df = pd.read_csv(p)
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date").reset_index(drop=True)
    return df

def feature_columns(df: pd.DataFrame) -> List[str]:
    return [c for c in df.columns if c not in ["date","ticker","Open","High","Low","Close","Volume","Target"]]

## Preprocessing

This block defines the train-only zero-preserving scaler for features. It preserves exact zeros for Tw_/Rd_/Nw_SP500_* and does not scale binary indicators. Train statistics are computed once and reused for Validation and Test.

In [None]:
class ZeroPreservingScaler:
    def __init__(self, cols: List[str], sentiment_cols: List[str], binary_cols: List[str]):
        self.cols = cols
        self.sentiment_cols = sentiment_cols
        self.binary_cols = binary_cols
        self.mu = {}
        self.sigma = {}
    def fit(self, df: pd.DataFrame):
        for c in self.cols:
            if c in self.binary_cols:
                continue
            x = df[c].values.astype(float)
            if c in self.sentiment_cols:
                mask = x != 0.0
                if mask.any():
                    v = x[mask]
                    self.mu[c] = float(np.mean(v))
                    s = float(np.std(v, ddof=0))
                    self.sigma[c] = float(s if s > 1e-12 else 1.0)
                else:
                    self.mu[c] = 0.0
                    self.sigma[c] = 1.0
            else:
                self.mu[c] = float(np.mean(x))
                s = float(np.std(x, ddof=0))
                self.sigma[c] = float(s if s > 1e-12 else 1.0)
        return self
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        out = df.copy()
        for c in self.cols:
            if c in self.binary_cols:
                continue
            x = out[c].values.astype(float)
            if c in self.sentiment_cols:
                mask = x != 0.0
                if mask.any():
                    x_scaled = (x[mask] - self.mu[c]) / self.sigma[c]
                    x[mask] = x_scaled
                x[~mask] = 0.0
            else:
                x = (x - self.mu[c]) / self.sigma[c]
            out[c] = x
        return out

def detect_binary_columns(df: pd.DataFrame, cols: List[str]) -> List[str]:
    bins = []
    for c in cols:
        u = pd.unique(df[c].dropna().astype(float))
        if len(u) <= 2 and set(u).issubset({0.0,1.0}):
            bins.append(c)
    return bins

def detect_sentiment_columns(cols: List[str]) -> List[str]:
    return [c for c in cols if c.startswith(SENTIMENT_PREFIXES)]

## Model Definition

This block defines the sequence dataset, sinusoidal positional encoding, Transformer encoder regressor, and a parameter counter. The model uses a last-timestep head for next-day prediction, matching the L=90 causal window.

In [None]:
# ====================== Sequences ======================

class SeqDataset(Dataset):
    def __init__(self, df: pd.DataFrame, feat_cols: List[str], L: int):
        self.X = df[feat_cols].values.astype(np.float32)
        self.y = df["Target"].values.astype(np.float32)
        self.dates = df["date"].values
        self.L = L
        self.n = len(df)
        self.indices = [(i-L, i) for i in range(L, self.n)]
    def __len__(self):
        return len(self.indices)
    def __getitem__(self, idx):
        a,b = self.indices[idx]
        x = self.X[a:b]
        y = self.y[b-1]
        return torch.from_numpy(x), torch.tensor(y, dtype=torch.float32)

# ====================== Model ======================

class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_len: int = 1000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(pos * div)
        pe[:, 1::2] = torch.cos(pos * div)
        self.register_buffer("pe", pe.unsqueeze(1))
    def forward(self, x):
        n, b, d = x.size()
        return x + self.pe[:n, :b, :d]

class TransformerRegressor(nn.Module):
    def __init__(self, d_in: int, d_model: int, nhead: int, dim_ff: int, depth: int, dropout: float):
        super().__init__()
        self.input_proj = nn.Linear(d_in, d_model)
        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_ff,
            dropout=dropout, batch_first=False, activation="gelu")
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=depth)
        self.posenc = PositionalEncoding(d_model)
        self.dropout = nn.Dropout(dropout)
        self.head = nn.Sequential(
            nn.Linear(d_model, d_model // 2), nn.ReLU(), nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )
    def forward(self, x):
        x = self.input_proj(x)        # (B, L, d_model)
        x = x.transpose(0,1)          # (L, B, d_model)
        x = self.posenc(x)
        x = self.encoder(x)           # (L, B, d_model)
        x = x[-1]                     # (B, d_model)
        x = self.dropout(x)
        y = self.head(x).squeeze(-1)  # (B,)
        return y

def parameter_count(model: nn.Module) -> int:
    return sum(p.numel() for p in model.parameters())

## Training

This block builds deterministic data loaders and performs training with early stopping on Validation. It saves per-refit training curves. No behaviour changes are introduced.

In [None]:
# ====================== Training utilities ======================

def build_loaders(df_train, df_val, feat_cols, L, batch_size, device) -> Tuple[DataLoader, DataLoader]:
    ds_tr = SeqDataset(df_train, feat_cols, L)
    ds_va = SeqDataset(df_val,   feat_cols, L)
    pin = (device.type == "cuda")
    tr = DataLoader(ds_tr, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=pin, num_workers=0)
    va = DataLoader(ds_va, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=pin, num_workers=0)
    return tr, va

def train_one(model, train_loader, val_loader, max_epochs, es_patience, lr, weight_decay, grad_clip, device, curves_path) -> int:
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    crit = nn.MSELoss()
    best_loss = float("inf"); best_state = None
    patience = es_patience
    tr_hist, va_hist = [], []
    epoch_ran = 0
    for epoch in range(1, max_epochs+1):
        model.train()
        tr_loss = 0.0; n_tr = 0
        for xb, yb in train_loader:
            xb = xb.to(device); yb = yb.to(device)
            opt.zero_grad()
            yhat = model(xb)
            loss = crit(yhat, yb)
            loss.backward()
            if grad_clip is not None:
                torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            opt.step()
            tr_loss += float(loss.detach().cpu()) * len(xb)
            n_tr += len(xb)
        tr_loss /= max(1, n_tr)

        model.eval()
        va_loss = 0.0; n_va = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb = xb.to(device); yb = yb.to(device)
                yhat = model(xb)
                loss = crit(yhat, yb)
                va_loss += float(loss.detach().cpu()) * len(xb)
                n_va += len(xb)
        va_loss /= max(1, n_va)

        tr_hist.append(tr_loss); va_hist.append(va_loss)
        if va_loss + 1e-12 < best_loss:
            best_loss = va_loss
            best_state = {k: v.cpu().clone() for k,v in model.state_dict().items()}
            patience = es_patience
        else:
            patience -= 1
        epoch_ran = epoch
        if patience == 0:
            break
    if best_state is not None:
        model.load_state_dict(best_state)

    plt.figure()
    plt.plot(range(1, len(tr_hist)+1), tr_hist, label="train")
    plt.plot(range(1, len(va_hist)+1), va_hist, label="val")
    plt.xlabel("epoch"); plt.ylabel("MSE"); plt.legend(); plt.tight_layout()
    plt.savefig(curves_path); plt.close()
    return epoch_ran

## Evaluation

This block computes Test metrics: RMSE, MAE, Theil’s U2 vs naïve last-close, DA with ε-gating and Coverage, and trading metrics using the pure-sign rule at 0 and 10 bps with Turnover.

In [None]:
# ====================== Metrics (pure-sign trading; DA ε-gated) ======================

def naive_last_close(y_true: np.ndarray) -> np.ndarray:
    y_naive = np.empty_like(y_true)
    y_naive[0] = y_true[0]
    y_naive[1:] = y_true[:-1]
    return y_naive

def theils_u2(y_true: np.ndarray, y_hat: np.ndarray, y_naive: np.ndarray) -> float:
    num = np.sqrt(np.mean((y_hat - y_true)**2))
    den = np.sqrt(np.mean((y_naive - y_true)**2))
    if den == 0.0: return 0.0
    return float(num / den)

def classes_from_returns(r: np.ndarray, eps: float) -> np.ndarray:
    cls = np.zeros_like(r)
    cls[r >= eps] = 1.0
    cls[r <= -eps] = -1.0
    return cls

def directional_accuracy_eps(y_true: np.ndarray, y_hat: np.ndarray, eps: float) -> Tuple[float, float, int]:
    # Predicted & realised returns for t->t+1 (uses y_t in denominator for both)
    r_true = (y_true[1:] / y_true[:-1]) - 1.0
    r_hat  = (y_hat[1:]  / y_true[:-1]) - 1.0
    cls_true = classes_from_returns(r_true, eps)
    cls_hat  = classes_from_returns(r_hat,  eps)
    mask = cls_true != 0.0
    n = int(mask.sum())
    if n == 0:
        return 0.0, 0.0, 0
    da = float(np.mean((cls_hat[mask] == cls_true[mask]).astype(float)))
    coverage = float(mask.mean())
    return da, coverage, n

def trading_metrics_puresign(y_true: np.ndarray, y_hat: np.ndarray, cost: float) -> Tuple[float, float, int]:
    """
    Pure-sign rule:
      position_t = sign(ŷ_{t+1} - y_t) with ties -> 0
      ret_{t+1}  = (y_{t+1}/y_t) - 1
      strategy   = position_t * ret_{t+1}
      costs only when position changes; turnover = count of changes
    """
    r_true = (y_true[1:] / y_true[:-1]) - 1.0
    pos = np.sign(y_hat[1:] - y_true[:-1])
    pos[np.isnan(pos)] = 0.0
    ties = (y_hat[1:] == y_true[:-1])
    pos[ties] = 0.0
    prev = 0.0
    strat = np.empty_like(r_true)
    turnover = 0
    for i in range(len(r_true)):
        if pos[i] != prev:
            turnover += 1
            strat[i] = pos[i] * r_true[i] - cost
        else:
            strat[i] = pos[i] * r_true[i]
        prev = pos[i]
    mean = float(np.mean(strat)) if len(strat) else 0.0
    std  = float(np.std(strat, ddof=0)) if len(strat) else 0.0
    sharpe = float(mean / std) if std > 1e-12 else 0.0
    cum = np.cumsum(strat)
    peak = np.maximum.accumulate(cum)
    drawdown = peak - cum
    maxdd = float(np.max(drawdown)) if len(drawdown) else 0.0
    return sharpe, maxdd, int(turnover)

def compute_metrics(y_true: np.ndarray, y_hat: np.ndarray) -> Dict[str, float]:
    y_naive = naive_last_close(y_true)
    rmse = float(np.sqrt(np.mean((y_hat - y_true)**2)))
    mae = float(np.mean(np.abs(y_hat - y_true)))
    u2 = theils_u2(y_true, y_hat, y_naive)
    da, cov, n = directional_accuracy_eps(y_true, y_hat, EPSILON_DA)
    sharpe0, mdd0, to0   = trading_metrics_puresign(y_true, y_hat, TRADING_COSTS[0])
    sharpe10, mdd10, _to = trading_metrics_puresign(y_true, y_hat, TRADING_COSTS[1])
    return {
        "RMSE": rmse,
        "MAE": mae,
        "U2": u2,
        "DA_epsilon": float(da),
        "Coverage": float(cov),
        "n": int(n),
        "Sharpe_0bps": float(sharpe0),
        "MaxDD_0bps": float(mdd0),
        "Sharpe_10bps": float(sharpe10),
        "MaxDD_10bps": float(mdd10),
        "Turnover": int(to0)
    }

## Outputs and Artefacts (walk-forward and packaging)

This block implements daily expanding-origin inference with a monthly refit cadence, enforces Test window length n=146, writes per-ticker predictions, metrics, run_config, and a canonical training curves file; then writes run-root provenance files and packages a zip.

In [None]:
# ====================== Daily walk-forward with monthly refits ======================

def first_trading_day_each_month(dates: pd.Series) -> List[pd.Timestamp]:
    df = pd.DataFrame({"date": pd.to_datetime(dates)}).copy()
    df["ym"] = df["date"].dt.to_period("M")
    out = df.groupby("ym")["date"].min().tolist()
    return [pd.Timestamp(d) for d in out]

def ensure_146(df_pred: pd.DataFrame):
    df_pred = df_pred[(df_pred["date"] >= pd.Timestamp(TEST_START)) & (df_pred["date"] <= pd.Timestamp(TEST_END))].copy()
    dates = pd.to_datetime(df_pred["date"].unique())
    if len(dates) != 146 or dates.min() != pd.Timestamp(TEST_START) or dates.max() != pd.Timestamp(TEST_END):
        raise AssertionError(f"Test window invalid. Expected 146 rows from {TEST_START} to {TEST_END}, got {len(dates)} rows from {dates.min()} to {dates.max()}.")
    return df_pred

def train_model_for_month(df_all_scaled: pd.DataFrame, refit_start: pd.Timestamp,
                          feat_cols: List[str], base_out_dir: Path, ticker: str) -> Tuple[Optional[nn.Module], int]:
    # Train/Val splits are fixed; history excludes current refit_start
    train_df = df_all_scaled[(df_all_scaled["date"] >= TRAIN_START) &
                             (df_all_scaled["date"] <= TRAIN_END) &
                             (df_all_scaled["date"] < refit_start)].copy()
    val_df   = df_all_scaled[(df_all_scaled["date"] >= VAL_START) &
                             (df_all_scaled["date"] <= VAL_END) &
                             (df_all_scaled["date"] < refit_start)].copy()
    if len(train_df) < L + 5 or len(val_df) < L + 5:
        return None, 0
    tr_loader, va_loader = build_loaders(train_df, val_df, feat_cols, L, BATCH_SIZE, DEVICE)
    model = TransformerRegressor(d_in=len(feat_cols), d_model=D_MODEL, nhead=NHEAD,
                                 dim_ff=DIM_FF, depth=DEPTH, dropout=DROPOUT).to(DEVICE)
    curves_path = base_out_dir / f"training_curves_{ticker}_{refit_start.strftime('%Y-%m-%d')}.png"
    epochs = train_one(model, tr_loader, va_loader, MAX_EPOCHS, ES_PATIENCE, LR, WEIGHT_DECAY, GRAD_CLIP, DEVICE, curves_path)
    return model, int(epochs)

def daily_walk_forward(df_all_scaled: pd.DataFrame, feat_cols: List[str], L: int,
                       base_out_dir: Path, ticker: str) -> Tuple[pd.DataFrame, Dict[str,int], List[str]]:
    # Full Test date list, sorted
    test_df = df_all_scaled[(df_all_scaled["date"] >= TEST_START) & (df_all_scaled["date"] <= TEST_END)].copy()
    test_dates = pd.to_datetime(test_df["date"]).drop_duplicates().sort_values().tolist()
    if len(test_dates) == 0:
        return pd.DataFrame(columns=["date","y_true","y_hat"]), {}, []
    # Monthly refit anchors (first trading day in each Test month)
    refit_days = first_trading_day_each_month(pd.Series(test_dates))
    epochs_per_refit: Dict[str,int] = {}
    used_refits_iso: List[str] = []
    preds: List[Tuple[pd.Timestamp, float, float]] = []

    for refit_start in refit_days:
        # Train model for this month on train-only scaled data
        model, epochs = train_model_for_month(df_all_scaled, refit_start, feat_cols, base_out_dir, ticker)
        if model is None:
            continue
        refit_iso = refit_start.strftime("%Y-%m-%d")
        epochs_per_refit[refit_iso] = epochs
        used_refits_iso.append(refit_iso)
        model.eval()

        # Predict each trading day d in this calendar month within the Test window
        month_end = (refit_start + pd.offsets.MonthEnd(0))
        month_dates = df_all_scaled.loc[
            (df_all_scaled["date"] >= refit_start) &
            (df_all_scaled["date"] <= month_end) &
            (df_all_scaled["date"] >= TEST_START) &
            (df_all_scaled["date"] <= TEST_END),
            "date"
        ].drop_duplicates().sort_values().tolist()

        with torch.no_grad():
            for d in month_dates:
                window_df = df_all_scaled.loc[df_all_scaled["date"] <= d].tail(L).copy()
                if len(window_df) < L:
                    continue
                x = torch.from_numpy(window_df[feat_cols].values.astype(np.float32)).unsqueeze(0)  # (1, L, F)
                y_hat = model(x.to(DEVICE)).cpu().numpy().ravel()[0]
                y_true = float(df_all_scaled.loc[df_all_scaled["date"] == d, "Target"].iloc[0])
                preds.append((pd.Timestamp(d), y_true, float(y_hat)))

    out = pd.DataFrame(preds, columns=["date","y_true","y_hat"]).drop_duplicates(subset=["date"]).sort_values("date")
    return out, epochs_per_refit, used_refits_iso

## Outputs and Artefacts (runner)

This final block executes the per-ticker run: applies train-only scaling, performs monthly refits, writes artefacts with canonical filenames, records run_config with optimiser and learning-rate schedule fields, writes manifests, and packages the zip. Expanding-origin daily inference within each month is retained.

In [None]:
# ====================== Runner (finalisation-ready) ======================
import os, sys, json, time, shutil, zipfile, hashlib, platform
from pathlib import Path
import numpy as np
import pandas as pd

# --------- paths ---------
# Bundle root holds provenance; model folder holds per-ticker outputs
BUNDLE_ROOT = Path("TRANSFORMER_FINAL")              # <— top-level bundle directory
RUN_ROOT    = BUNDLE_ROOT / "TRANSFORMER"            # <— model directory with per-ticker subfolders
RUN_ROOT.mkdir(parents=True, exist_ok=True)

# --------- provenance helpers ---------
def sha256_file(p: Path, chunk_size: int = 1024 * 1024) -> str:
    h = hashlib.sha256()
    with p.open("rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            h.update(chunk)
    return h.hexdigest()

def write_env_manifest(root: Path) -> Path:
    lines = []
    lines.append(f"timestamp_utc: {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}")
    lines.append(f"os: {platform.platform()}")
    lines.append(f"python: {sys.version.split()[0]}")
    # Best-effort library versions (present if installed)
    def ver(modname):
        try:
            mod = __import__(modname)
            return getattr(mod, "__version__", "unknown")
        except Exception:
            return "not_installed"
    lines.append(f"numpy: {ver('numpy')}")
    lines.append(f"pandas: {ver('pandas')}")
    lines.append(f"torch: {ver('torch')}")
    lines.append(f"statsmodels: {ver('statsmodels')}")
    out = root / "env_manifest.txt"
    out.write_text("\n".join(lines) + "\n", encoding="utf-8")
    return out

def write_file_hashes(root: Path) -> Path:
    records = []
    for p in sorted(root.rglob("*")):
        if p.is_file():
            rel = p.relative_to(root).as_posix()
            # Skip file_hashes.json itself while computing hashes
            if rel == "file_hashes.json":
                continue
            records.append({"path": rel, "sha256": sha256_file(p)})
    out = root / "file_hashes.json"
    out.write_text(json.dumps({"files": records}, indent=2), encoding="utf-8")
    return out

def package_zip(root: Path) -> Path:
    # Zip the BUNDLE_ROOT so env_manifest.txt and file_hashes.json sit at archive top level
    zip_path = root.with_suffix("")  # remove extension if any
    zip_path = zip_path.parent / f"{root.name}_bundle.zip"
    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=6) as z:
        for p in sorted(root.rglob("*")):
            if p.is_file():
                z.write(p, p.relative_to(root))
    return zip_path

# --------- your existing dependencies assumed to be defined elsewhere ---------
# read_ticker_df, feature_columns, detect_binary_columns, detect_sentiment_columns,
# ZeroPreservingScaler, daily_walk_forward, ensure_146, compute_metrics,
# TransformerRegressor, parameter_count, and constants:
# TICKERS, TRAIN_START, TRAIN_END, TEST_START, TEST_END,
# L, D_MODEL, NHEAD, DIM_FF, DEPTH, DROPOUT, BATCH_SIZE, MAX_EPOCHS,
# ES_PATIENCE, LR, WEIGHT_DECAY, GRAD_CLIP, RANDOM_SEED

def run_for_ticker(ticker: str):
    out_dir = RUN_ROOT / ticker
    out_dir.mkdir(parents=True, exist_ok=True)

    df = read_ticker_df(ticker)
    feat_cols = feature_columns(df)

    # ----- Train-only scaling fit (invariant) -----
    df_train_only = df[(df["date"] >= TRAIN_START) & (df["date"] <= TRAIN_END)].copy()
    bin_cols  = detect_binary_columns(df_train_only, feat_cols)
    sent_cols = detect_sentiment_columns(feat_cols)
    scaler_train = ZeroPreservingScaler(cols=feat_cols, sentiment_cols=sent_cols, binary_cols=bin_cols).fit(df_train_only)

    # ----- Transform full cadence range using Train-only scaler -----
    df_all   = df[(df["date"] >= TRAIN_START) & (df["date"] <= TEST_END)].copy()
    df_all_s = scaler_train.transform(df_all)

    preds_df, epochs_per_refit, used_refits_iso = daily_walk_forward(df_all_s, feat_cols, L, out_dir, ticker)

    # ----- Enforce exact Test window and 146 rows -----
    preds_df = preds_df[(preds_df["date"] >= pd.Timestamp(TEST_START)) & (preds_df["date"] <= pd.Timestamp(TEST_END))].copy()
    if preds_df.empty:
        raise RuntimeError(f"No predictions produced for {ticker}. Check input dates and feature/Target availability.")
    preds_df = ensure_146(preds_df)
    preds_df["residual"] = preds_df["y_true"] - preds_df["y_hat"]
    preds_df["in_sample_flag"] = 0

    # ----- Metrics -----
    y_true = preds_df["y_true"].values.astype(float)
    y_hat  = preds_df["y_hat"].values.astype(float)
    metrics = compute_metrics(y_true, y_hat)  # expected to include RMSE, MAE, U2, DA_epsilon, Coverage, n, Sharpe_0bps, Sharpe_10bps, MaxDD_0bps, MaxDD_10bps, Turnover

    # ----- Write artefacts -----
    preds_path = out_dir / f"predictions_TRANSFORMER_{ticker}.csv"
    preds_df.to_csv(preds_path, index=False)

    metrics_path = out_dir / f"metrics_TRANSFORMER_{ticker}.json"
    with open(metrics_path, "w") as f:
        json.dump({k: (float(v) if isinstance(v, (np.floating,)) else v) for k, v in metrics.items()}, f, indent=2)

    # Canonical training-curve image: copy the last monthly curve to a fixed name
    if used_refits_iso:
        last_refit_png = out_dir / f"training_curves_{ticker}_{used_refits_iso[-1]}.png"
        canonical_png  = out_dir / f"training_curves_TRANSFORMER_{ticker}.png"
        if last_refit_png.exists():
            shutil.copy2(last_refit_png, canonical_png)

    # ----- run_config -----
    cfg = {
        "model_id": "TRANSFORMER",
        "ticker": ticker,
        "L": int(L),
        "features_used": feat_cols,
        "sentiment_zero_preserving": True,
        "hyperparameters": {
            "d_model": int(D_MODEL),
            "nhead": int(NHEAD),
            "dim_ff": int(DIM_FF),
            "depth": int(DEPTH),
            "dropout": float(DROPOUT),
            "batch_size": int(BATCH_SIZE),
            "max_epochs": int(MAX_EPOCHS),
            "early_stop_patience": int(ES_PATIENCE),
            "lr": float(LR),
            "weight_decay": float(WEIGHT_DECAY),
            "grad_clip": float(GRAD_CLIP),
            "optimiser": "Adam",
            "lr_schedule": None,
        },
        "random_seed": int(RANDOM_SEED),
        "cadence": "monthly_refit",
        "refit_dates": used_refits_iso,
        "epochs_per_refit": epochs_per_refit,
        "parameter_count": int(parameter_count(TransformerRegressor(len(feat_cols), D_MODEL, NHEAD, DIM_FF, DEPTH, DROPOUT))),
        "metrics_on_inverse_levels": True  # explicit confirmation for dissertation reporting
    }
    cfg_path = out_dir / f"run_config_TRANSFORMER_{ticker}.json"
    with open(cfg_path, "w") as f:
        json.dump(cfg, f, indent=2)

def main():
    # Per-ticker runs
    for t in TICKERS:
        run_for_ticker(t)

    # Provenance at bundle root
    BUNDLE_ROOT.mkdir(parents=True, exist_ok=True)
    write_env_manifest(BUNDLE_ROOT)
    write_file_hashes(BUNDLE_ROOT)

    # Zip the bundle root so provenance is at archive top level
    zip_path = package_zip(BUNDLE_ROOT)

    # Optional Colab download
    try:
        from google.colab import files  # type: ignore
        files.download(str(zip_path))
    except Exception:
        pass

    print(f"Done.\nBundle root: {BUNDLE_ROOT.resolve()}\nZip: {zip_path.resolve()}\nSHA256(zip): {sha256_file(zip_path)}")

if __name__ == "__main__":
    main()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Done.
Bundle root: /content/TRANSFORMER_FINAL
Zip: /content/TRANSFORMER_FINAL_bundle.zip
SHA256(zip): fb44311b7598e04b61840e314707065e3b78ae0aeefc0fca084582bc4b787d55
