In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os, re, json, glob
from dataclasses import dataclass, asdict
import numpy as np
import pandas as pd

# -----------------------------
# 0) Reproducibility & folders
# -----------------------------
np.random.seed(1337)
BASE_DIR = "/content"
EXPORT_DIRS = [f"{BASE_DIR}/export/tables", f"{BASE_DIR}/export/figures", f"{BASE_DIR}/export/logs"]
for d in EXPORT_DIRS:
    os.makedirs(d, exist_ok=True)

# -----------------------------
# 1) Minimal experiment config
# -----------------------------
@dataclass
class ExpConfig:
    assets: tuple = tuple(["BTC","ETH","BNB","XRP","ADA","SOL","DOGE","TRX","DOT","LTC"])
    freq: str = "1D"
    tz: str = "UTC"
    t_train_end: str = "2022-12-31"
    t_val_end: str = "2023-12-31"
    t_test_end: str = "2025-09-30"
    min_rows_per_asset: int = 500
    expect_cols: tuple = tuple(["timestamp","symbol","open","high","low","close","volume"])
    data_paths: tuple = tuple([
        "/content/drive/My Drive/PHD/Sep 2025/Dataset/archive (9)/*.csv"
    ])

CFG = ExpConfig()
print("Experiment Config:\n", json.dumps(asdict(CFG), indent=2))

# -----------------------------
# 2) Helpers
# -----------------------------
def _parse_timestamp(s):
    try:
        return pd.to_datetime(s, utc=True, infer_datetime_format=True)
    except Exception:
        return pd.NaT

def _infer_symbol_from_filename(path, fallback="UNK"):
    stem = os.path.splitext(os.path.basename(path))[0]
    tokens = re.findall(r'[A-Z0-9]{2,10}', stem.upper())
    return (tokens[0] if tokens else fallback)

def _standardize_cols(df: pd.DataFrame) -> pd.DataFrame:
    df = df.rename(columns={c: c.lower().strip() for c in df.columns})
    alias = {'date':'timestamp','datetime':'timestamp','time':'timestamp',
             'asset':'symbol','ticker':'symbol','coin':'symbol'}
    for k,v in alias.items():
        if k in df.columns:
            df = df.rename(columns={k:v})
    for k in ['close','adj_close','close_price','price']:
        if k in df.columns:
            df = df.rename(columns={k:'close'})
            break
    for need in ['timestamp','symbol','open','high','low','close','volume']:
        if need not in df.columns:
            if need in ['open','high','low','volume']:
                df[need] = np.nan
            else:
                raise ValueError(f"Missing required column: {need}")
    return df

def _to_long(df0: pd.DataFrame, path: str) -> pd.DataFrame:
    lc = [c.lower() for c in df0.columns]
    if 'symbol' in lc:
        return df0
    ts_candidates = [c for c in df0.columns if c.lower() in ['timestamp','date','datetime','time']]
    if not ts_candidates:
        raise ValueError(f"Cannot infer timestamp column in: {path}")
    ts_col = ts_candidates[0]
    df0 = df0.rename(columns={ts_col:'timestamp'})
    non_ts = [c for c in df0.columns if c != 'timestamp']
    ohlcv_set = {'open','high','low','close','volume'}
    if not set([c.lower() for c in non_ts]).issubset(ohlcv_set):
        vname = '_melt_value_'
        df0 = df0.melt(id_vars='timestamp', var_name='symbol', value_name=vname)
        df0 = df0.rename(columns={vname:'close'})
        return df0
    sym = _infer_symbol_from_filename(path)
    df0['symbol'] = sym
    return df0

def load_any(paths) -> pd.DataFrame:
    if not paths:
        raise SystemExit("Set CFG.data_paths to your CSV path(s).")
    files = []
    for p in paths:
        files.extend(glob.glob(p))
    if not files:
        raise SystemExit(f"No files matched: {paths}")
    frames = []
    for f in files:
        df0 = pd.read_csv(f)
        df0 = _to_long(df0, f)
        df0 = _standardize_cols(df0)
        frames.append(df0)
    df = pd.concat(frames, ignore_index=True)
    df['timestamp'] = df['timestamp'].apply(_parse_timestamp)
    df = df.dropna(subset=['timestamp','symbol','close'])
    df['symbol'] = df['symbol'].astype(str).str.upper().str.replace(r'[^A-Z0-9]', '', regex=True)
    df = df.sort_values('timestamp')
    return df

def resample_ohlcv(df, freq):
    df = df.set_index('timestamp').sort_index()
    agg = {'open':'first','high':'max','low':'min','close':'last','volume':'sum'}
    out = (df.groupby('symbol')
             .apply(lambda g: g.resample(freq).agg(agg).dropna(subset=['close']))
             .reset_index())
    return out

# -----------------------------
# 3) Load & filter
# -----------------------------
raw = load_any(CFG.data_paths)

asset_counts = raw['symbol'].value_counts()
wanted = [a for a in CFG.assets if a in asset_counts.index]
if len(wanted) < len(CFG.assets):
    for a in asset_counts.index:
        if a not in wanted and len(wanted) < len(CFG.assets):
            wanted.append(a)
data = raw[raw['symbol'].isin(wanted)].copy()

data = resample_ohlcv(data, CFG.freq)

keep_assets = [a for a, g in data.groupby('symbol') if len(g) >= CFG.min_rows_per_asset]
data = data[data['symbol'].isin(keep_assets)]
print(f"Kept assets ({len(keep_assets)}): {sorted(keep_assets)}")

# -----------------------------
# 4) Temporal splits
# -----------------------------
t_train_end = pd.Timestamp(CFG.t_train_end, tz='UTC')
t_val_end   = pd.Timestamp(CFG.t_val_end, tz='UTC')
t_test_end  = pd.Timestamp(CFG.t_test_end, tz='UTC')

def _label_split(ts):
    if ts <= t_train_end: return "train"
    if ts <= t_val_end:   return "val"
    if ts <= t_test_end:  return "test"
    return "holdout_future"

data['split'] = data['timestamp'].apply(_label_split)
data = data[data['split'] != "holdout_future"]

# -----------------------------
# 5) Summary & exports
# -----------------------------
summary = {
    "date_range": {
        "min": str(data['timestamp'].min()),
        "max": str(data['timestamp'].max()),
        "freq": CFG.freq
    },
    "assets": sorted(data['symbol'].unique().tolist()),
    "rows_total": int(len(data)),
    "rows_by_split": data['split'].value_counts().to_dict(),
    "rows_by_asset": data.groupby('symbol').size().sort_values(ascending=False).to_dict(),
    "missing_by_col": {c:int(data[c].isna().sum()) for c in ['open','high','low','close','volume']},
}
print("\n=== DATASET SUMMARY ===")
print(json.dumps(summary, indent=2))

data_out = f"{BASE_DIR}/export/tables/dataset_long_{CFG.freq}.csv"
json_out = f"{BASE_DIR}/export/tables/dataset_summary.json"
data.to_csv(data_out, index=False)
with open(json_out, "w") as f:
    json.dump(summary, f, indent=2)
print(f"\nSaved: {data_out}")
print(f"Saved: {json_out}")

pivot = (data
         .groupby(['symbol','split'])
         .size()
         .unstack(fill_value=0)
         .reset_index()
         .sort_values(by=['test','val','train'], ascending=False))
pivot_out = f"{BASE_DIR}/export/tables/coverage_by_asset_split.csv"
pivot.to_csv(pivot_out, index=False)
print(f"Saved: {pivot_out}")
print(pivot.head(20))


Experiment Config:
 {
  "assets": [
    "BTC",
    "ETH",
    "BNB",
    "XRP",
    "ADA",
    "SOL",
    "DOGE",
    "TRX",
    "DOT",
    "LTC"
  ],
  "freq": "1D",
  "tz": "UTC",
  "t_train_end": "2022-12-31",
  "t_val_end": "2023-12-31",
  "t_test_end": "2025-09-30",
  "min_rows_per_asset": 500,
  "expect_cols": [
    "timestamp",
    "symbol",
    "open",
    "high",
    "low",
    "close",
    "volume"
  ],
  "data_paths": [
    "/content/drive/My Drive/PHD/Sep 2025/Dataset/archive (9)/*.csv"
  ]
}


  return pd.to_datetime(s, utc=True, infer_datetime_format=True)
  .apply(lambda g: g.resample(freq).agg(agg).dropna(subset=['close']))


Kept assets (5): ['CLOSE', 'HIGH', 'LOW', 'OPEN', 'TICKER']

=== DATASET SUMMARY (for §4.1 paste) ===
{
  "date_range": {
    "min": "2010-07-17 00:00:00+00:00",
    "max": "2025-09-30 00:00:00+00:00",
    "freq": "1D"
  },
  "assets": [
    "CLOSE",
    "HIGH",
    "LOW",
    "OPEN",
    "TICKER"
  ],
  "rows_total": 27535,
  "rows_by_split": {
    "train": 22755,
    "test": 2955,
    "val": 1825
  },
  "rows_by_asset": {
    "CLOSE": 5507,
    "HIGH": 5507,
    "LOW": 5507,
    "OPEN": 5507,
    "TICKER": 5507
  },
  "missing_by_col": {
    "open": 27535,
    "high": 27535,
    "low": 27535,
    "close": 0,
    "volume": 0
  }
}

Saved: /content/export/tables/dataset_long_1D.csv
Saved: /content/export/tables/dataset_summary.json
Saved: /content/export/tables/coverage_by_asset_split.csv
split  symbol  test  train  val
0       CLOSE   591   4551  365
1        HIGH   591   4551  365
2         LOW   591   4551  365
3        OPEN   591   4551  365
4      TICKER   591   4551  365


In [5]:
!pip -q install torchinfo torchviz > /dev/null

import os, json, warnings
from dataclasses import dataclass, asdict
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary as torch_summary

try:
    from torchviz import make_dot
    TORCHVIZ_OK = True
except Exception:
    TORCHVIZ_OK = False

warnings.filterwarnings("ignore", category=FutureWarning)

BASE_DIR = "/content"
TBL_DIR  = f"{BASE_DIR}/export/tables"
FIG_DIR  = f"{BASE_DIR}/export/figures"
os.makedirs(TBL_DIR, exist_ok=True)
os.makedirs(FIG_DIR, exist_ok=True)

@dataclass
class ExpConfig:
    ds_csv: str = f"{BASE_DIR}/export/tables/dataset_long_1D.csv"
    assets_max: int = 5
    horizons: List[int] = (1, 3, 7)
    win: int = 64
    sma_window: int = 5
    batch_size: int = 128
    epochs: int = 10
    patience: int = 3
    lr: float = 1e-3
    lstm_hidden: int = 64
    lstm_layers: int = 2
    lstm_dropout: float = 0.1
    spline_K: int = 16
    seed: int = 1337
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

CFG = ExpConfig()
np.random.seed(CFG.seed)
torch.manual_seed(CFG.seed)
print("Baseline Config\n", json.dumps(asdict(CFG), indent=2))

df = pd.read_csv(CFG.ds_csv, parse_dates=["timestamp"])
assert {"timestamp","symbol","close","split"}.issubset(df.columns)
df["close"] = pd.to_numeric(df["close"], errors="coerce")
df = df.dropna(subset=["close"]).reset_index(drop=True)

top_assets = (df.groupby("symbol").size()
              .sort_values(ascending=False)
              .head(CFG.assets_max).index.tolist())
df = df[df["symbol"].isin(top_assets)].copy()

scalers: Dict[str, Tuple[float,float]] = {}
for a, g in df[df["split"]=="train"].groupby("symbol"):
    mu = float(g["close"].mean())
    sd = float(g["close"].std(ddof=0))
    scalers[a] = (mu, sd if sd > 0 else 1.0)

def zscore(a, x):
    mu, sd = scalers[a]
    return (x - mu) / sd

def make_xy(series: pd.Series, horizon: int, win: int) -> Tuple[np.ndarray,np.ndarray]:
    x_list, y_list = [], []
    vals = series.values
    for t in range(win-1, len(vals)-horizon):
        x = vals[t-win+1:t+1]
        y = vals[t+horizon]
        x_list.append(x.astype(np.float32))
        y_list.append(np.float32(y))
    return np.stack(x_list), np.stack(y_list)

class SeqDataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.from_numpy(X)[:, :, None]
        self.Y = torch.from_numpy(Y)[:, None]
    def __len__(self): return self.X.shape[0]
    def __getitem__(self, i): return self.X[i], self.Y[i]

data_xy = {}
coverage = []
for a in top_assets:
    g = df[df["symbol"]==a].sort_values("timestamp").copy()
    if a not in scalers:
        continue
    z = g["close"].map(lambda v: zscore(a, v))
    g = g.assign(z=z.values)
    for split in ["train","val","test"]:
        gz = g[g["split"]==split]["z"]
        for h in CFG.horizons:
            if len(gz) >= CFG.win + h + 1:
                X, Y = make_xy(gz.reset_index(drop=True), h, CFG.win)
                data_xy[(a, split, h)] = (X, Y)
                coverage.append((a, split, h, len(Y)))
            else:
                coverage.append((a, split, h, 0))

pd.DataFrame(coverage, columns=["asset","split","horizon","samples"]).to_csv(
    f"{TBL_DIR}/coverage_windows.csv", index=False
)
print("Saved:", f"{TBL_DIR}/coverage_windows.csv")

def predict_naive(x_batch):
    return x_batch[:, -1, 0:1]

def predict_sma(x_batch, k=5):
    xs = x_batch[:, -k:, 0]
    return xs.mean(dim=1, keepdim=True)

class LSTMForecast(nn.Module):
    def __init__(self, hidden=64, layers=2, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden,
                            num_layers=layers, dropout=dropout, batch_first=True)
        self.head = nn.Sequential(nn.Linear(hidden, hidden), nn.ReLU(), nn.Linear(hidden, 1))
    def forward(self, x):
        out, _ = self.lstm(x)
        last = out[:, -1, :]
        return self.head(last)

class TriangularSpline(nn.Module):
    def __init__(self, K=16, xmin=-4.0, xmax=4.0):
        super().__init__()
        self.K = K
        centers = torch.linspace(xmin, xmax, K)
        self.register_buffer("centers", centers)
        self.delta = (xmax - xmin) / (K - 1 + 1e-6)
    def forward(self, x):
        B, W, _ = x.shape
        xexp = x.expand(-1, -1, self.K)
        cexp = self.centers.view(1,1,-1).expand(B, W, -1)
        return torch.relu(1.0 - torch.abs((xexp - cexp) / self.delta))

class KANForecast(nn.Module):
    def __init__(self, K=16):
        super().__init__()
        self.spline = TriangularSpline(K=K)
        self.head = nn.Sequential(nn.Linear(K, K), nn.ReLU(), nn.Linear(K, 1))
    def forward(self, x):
        phi = self.spline(x)
        pooled = phi.mean(dim=1)
        return self.head(pooled)

def count_params(m: nn.Module) -> int:
    return sum(p.numel() for p in m.parameters() if p.requires_grad)

def train_model(model, train_loader, val_loader, epochs, lr, patience, device):
    model = model.to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    best_state, best_val = None, float("inf")
    hist = {"train": [], "val": []}
    patience_left = patience
    for _ in range(1, epochs+1):
        model.train()
        tr_loss = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            yhat = model(xb)
            loss = loss_fn(yhat, yb)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            tr_loss += loss.item() * xb.size(0)
        tr_loss /= max(1, len(train_loader.dataset))
        model.eval()
        va_loss = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                yhat = model(xb)
                va_loss += loss_fn(yhat, yb).item() * xb.size(0)
        va_loss /= max(1, len(val_loader.dataset))
        hist["train"].append(tr_loss); hist["val"].append(va_loss)
        if va_loss + 1e-9 < best_val:
            best_val = va_loss
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            patience_left = patience
        else:
            patience_left -= 1
            if patience_left <= 0:
                break
    if best_state is not None:
        model.load_state_dict(best_state)
    return model.cpu(), hist

def mae(y, yhat): return float(np.mean(np.abs(y - yhat)))
def rmse(y, yhat): return float(np.sqrt(np.mean((y - yhat)**2)))
def smape(y, yhat, eps=1e-8):
    denom = (np.abs(y) + np.abs(yhat) + eps) / 2.0
    return float(np.mean(np.abs(y - yhat) / denom) * 100.0)
def dir_acc(y_prev, y_true, yhat):
    s_true = np.sign(y_true - y_prev)
    s_pred = np.sign(yhat - y_prev)
    return float(np.mean((s_true == s_pred).astype(np.float32)))

results = []
model_summaries = []

for a in top_assets:
    for h in CFG.horizons:
        key_tr, key_va, key_te = (a, "train", h), (a, "val", h), (a, "test", h)
        if key_tr not in data_xy or key_va not in data_xy or key_te not in data_xy:
            continue
        Xtr, Ytr = data_xy[key_tr]
        Xva, Yva = data_xy[key_va]
        Xte, Yte = data_xy[key_te]
        ds_tr, ds_va, ds_te = SeqDataset(Xtr, Ytr), SeqDataset(Xva, Yva), SeqDataset(Xte, Yte)
        dl_tr = DataLoader(ds_tr, batch_size=CFG.batch_size, shuffle=True)
        dl_va = DataLoader(ds_va, batch_size=CFG.batch_size, shuffle=False)
        dl_te = DataLoader(ds_te, batch_size=CFG.batch_size, shuffle=False)

        with torch.no_grad():
            xvb = ds_te.X
            y_prev = xvb[:, -1, 0].numpy()
            y_true = ds_te.Y[:, 0].numpy()
            y_naive = predict_naive(ds_te.X).numpy()[:,0]
            y_sma   = predict_sma(ds_te.X, k=CFG.sma_window).numpy()[:,0]

        results.append(dict(model="Naive", asset=a, horizon=h,
                            MAE=mae(y_true, y_naive), RMSE=rmse(y_true, y_naive),
                            sMAPE=smape(y_true, y_naive), DA=dir_acc(y_prev, y_true, y_naive)))
        results.append(dict(model="SMA", asset=a, horizon=h,
                            MAE=mae(y_true, y_sma), RMSE=rmse(y_true, y_sma),
                            sMAPE=smape(y_true, y_sma), DA=dir_acc(y_prev, y_true, y_sma)))

        lstm = LSTMForecast(hidden=CFG.lstm_hidden, layers=CFG.lstm_layers, dropout=CFG.lstm_dropout)
        lstm, hist_lstm = train_model(lstm, dl_tr, dl_va, CFG.epochs, CFG.lr, CFG.patience, CFG.device)
        with torch.no_grad():
            yhat_lstm = []
            for xb, _ in dl_te:
                yhat_lstm.append(lstm(xb.to(CFG.device)).detach().cpu().numpy())
            yhat_lstm = np.concatenate(yhat_lstm, axis=0)[:,0]
        results.append(dict(model="LSTM", asset=a, horizon=h,
                            MAE=mae(y_true, yhat_lstm), RMSE=rmse(y_true, yhat_lstm),
                            sMAPE=smape(y_true, yhat_lstm), DA=dir_acc(y_prev, y_true, yhat_lstm)))

        kan = KANForecast(K=CFG.spline_K)
        kan, hist_kan = train_model(kan, dl_tr, dl_va, CFG.epochs, CFG.lr, CFG.patience, CFG.device)
        with torch.no_grad():
            yhat_kan = []
            for xb, _ in dl_te:
                yhat_kan.append(kan(xb.to(CFG.device)).detach().cpu().numpy())
            yhat_kan = np.concatenate(yhat_kan, axis=0)[:,0]
        results.append(dict(model="KAN-spline", asset=a, horizon=h,
                            MAE=mae(y_true, yhat_kan), RMSE=rmse(y_true, yhat_kan),
                            sMAPE=smape(y_true, yhat_kan), DA=dir_acc(y_prev, y_true, yhat_kan)))

        def plot_loss(hist, title, fname):
            plt.figure(figsize=(6,4))
            plt.plot(hist["train"], label="train")
            plt.plot(hist["val"], label="val")
            plt.xlabel("Epoch"); plt.ylabel("MSE loss"); plt.title(title); plt.legend()
            plt.tight_layout()
            pth = os.path.join(FIG_DIR, fname)
            plt.savefig(pth, dpi=150); plt.close()
            return pth

        plot_loss(hist_lstm, f"LSTM Loss — {a}, H={h}", f"loss_lstm_{a}_H{h}.png")
        plot_loss(hist_kan,  f"KAN-spline Loss — {a}, H={h}", f"loss_kan_{a}_H{h}.png")

        Nplot = min(400, len(y_true))
        def plot_trace(y_true, yhat, title, fname):
            plt.figure(figsize=(8,4))
            plt.plot(y_true[:Nplot], label="true")
            plt.plot(yhat[:Nplot],  label="pred")
            plt.xlabel("Test sample index"); plt.ylabel("Standardized close")
            plt.title(title); plt.legend(); plt.tight_layout()
            pth = os.path.join(FIG_DIR, fname)
            plt.savefig(pth, dpi=150); plt.close()
            return pth

        plot_trace(y_true, yhat_lstm, f"LSTM Test Trace — {a}, H={h}", f"trace_lstm_{a}_H{h}.png")
        plot_trace(y_true, yhat_kan,  f"KAN-spline Test Trace — {a}, H={h}", f"trace_kan_{a}_H{h}.png")

        def plot_residuals(y_true, yhat, title, fname):
            plt.figure(figsize=(6,4))
            resid = y_true - yhat
            plt.hist(resid, bins=40)
            plt.xlabel("Residual"); plt.ylabel("Count"); plt.title(title); plt.tight_layout()
            pth = os.path.join(FIG_DIR, fname)
            plt.savefig(pth, dpi=150); plt.close()
            return pth

        plot_residuals(y_true, yhat_lstm, f"Residuals LSTM — {a}, H={h}", f"resid_lstm_{a}_H{h}.png")
        plot_residuals(y_true, yhat_kan,  f"Residuals KAN — {a}, H={h}", f"resid_kan_{a}_H{h}.png")

        lstm_params = count_params(lstm)
        ms_lstm = torch_summary(lstm, input_size=(1, CFG.win, 1), verbose=0)
        with open(os.path.join(TBL_DIR, f"model_summary_lstm_{a}_H{h}.txt"), "w") as f:
            f.write(str(ms_lstm)); f.write(f"\nTotal trainable parameters: {lstm_params}\n")
        kan_params = count_params(kan)
        ms_kan = torch_summary(kan, input_size=(1, CFG.win, 1), verbose=0)
        with open(os.path.join(TBL_DIR, f"model_summary_kan_{a}_H{h}.txt"), "w") as f:
            f.write(str(ms_kan)); f.write(f"\nTotal trainable parameters: {kan_params}\n")

        model_summaries.append({"asset":a,"horizon":h,"model":"LSTM","params":lstm_params})
        model_summaries.append({"asset":a,"horizon":h,"model":"KAN-spline","params":kan_params})

        if TORCHVIZ_OK:
            try:
                xdummy = torch.randn(1, CFG.win, 1)
                make_dot(lstm(xdummy), params=dict(list(lstm.named_parameters()))).render(
                    os.path.join(FIG_DIR, f"graph_lstm_{a}_H{h}"), format="png", cleanup=True)
                make_dot(kan(xdummy), params=dict(list(kan.named_parameters()))).render(
                    os.path.join(FIG_DIR, f"graph_kan_{a}_H{h}"), format="png", cleanup=True)
            except Exception as e:
                print("torchviz failed for", a, h, ":", e)

        xx = np.linspace(-4, 4, 400, dtype=np.float32)
        with torch.no_grad():
            x_t = torch.from_numpy(xx).view(1, -1, 1)
            phi = kan.spline(x_t).squeeze(0).numpy()
        plt.figure(figsize=(7,4))
        for k in range(phi.shape[1]):
            plt.plot(xx, phi[:,k])
        plt.title(f"KAN Triangular Bases (K={CFG.spline_K}) — {a}, H={h}")
        plt.xlabel("Standardized input"); plt.ylabel("Activation")
        plt.tight_layout()
        plt.savefig(os.path.join(FIG_DIR, f"kan_bases_{a}_H{h}.png"), dpi=150)
        plt.close()

res_df = pd.DataFrame(results).sort_values(["asset","horizon","model"])
res_csv = os.path.join(TBL_DIR, "baseline_results_per_asset_horizon.csv")
res_df.to_csv(res_csv, index=False)

agg_df = (res_df.groupby(["model","horizon"])
          .agg(MAE=("MAE","mean"), RMSE=("RMSE","mean"), sMAPE=("sMAPE","mean"), DA=("DA","mean"))
          .reset_index()
          .sort_values(["horizon","model"]))
agg_csv = os.path.join(TBL_DIR, "baseline_results_agg_by_horizon.csv")
agg_df.to_csv(agg_csv, index=False)

param_df = pd.DataFrame(model_summaries)
param_csv = os.path.join(TBL_DIR, "model_param_counts.csv")
param_df.to_csv(param_csv, index=False)

def barplot_param_counts(df, fname):
    piv = df.pivot_table(index=["model"], values="params", aggfunc="mean").reset_index()
    plt.figure(figsize=(6,4))
    plt.bar(piv["model"], piv["params"])
    plt.ylabel("Parameters (mean across assets & horizons)")
    plt.title("Model Parameter Counts")
    plt.tight_layout()
    pth = os.path.join(FIG_DIR, fname); plt.savefig(pth, dpi=150); plt.close(); return pth

def barplot_metric(df, metric, fname, title):
    plt.figure(figsize=(7,4))
    labels = sorted(df["horizon"].unique())
    models = df["model"].unique().tolist()
    width = 0.15
    idx = np.arange(len(labels))
    for i, m in enumerate(models):
        sub = df[df["model"]==m].set_index("horizon").reindex(labels)
        plt.bar(idx + i*width, sub[metric].values, width=width, label=m)
    plt.xticks(idx + width*(len(models)-1)/2, [f"H={h}" for h in labels])
    plt.ylabel(metric); plt.title(title); plt.legend()
    plt.tight_layout()
    pth = os.path.join(FIG_DIR, fname); plt.savefig(pth, dpi=150); plt.close(); return pth

barplot_param_counts(param_df, "param_counts_bar.png")
barplot_metric(agg_df, "DA",    "directional_accuracy_bars.png", "Directional Accuracy by Model & Horizon")
barplot_metric(agg_df, "MAE",   "mae_bars.png", "MAE by Model & Horizon")
barplot_metric(agg_df, "RMSE",  "rmse_bars.png","RMSE by Model & Horizon")
barplot_metric(agg_df, "sMAPE", "smape_bars.png","sMAPE by Model & Horizon")

print("\nRESULTS SUMMARY")
print("Per-asset × horizon results CSV:", res_csv)
print(res_df.head(12).to_string(index=False))
print("\nAggregated by horizon (means across assets):", agg_csv)
print(agg_df.to_string(index=False))

figs = sorted([os.path.join(FIG_DIR, f) for f in os.listdir(FIG_DIR) if f.endswith(".png")])
print("\nFIGURES SAVED")
for p in figs[:30]:
    print(p)
if len(figs) > 30:
    print(f"... and {len(figs)-30} more figures")

print("\nArtifacts:")
print(" - Model summaries (*.txt) in", TBL_DIR)
print(" - Parameter counts:", param_csv)
print(" - Coverage windows:", f"{TBL_DIR}/coverage_windows.csv")


Baseline Config
 {
  "ds_csv": "/content/export/tables/dataset_long_1D.csv",
  "assets_max": 5,
  "horizons": [
    1,
    3,
    7
  ],
  "win": 64,
  "sma_window": 5,
  "batch_size": 128,
  "epochs": 10,
  "patience": 3,
  "lr": 0.001,
  "lstm_hidden": 64,
  "lstm_layers": 2,
  "lstm_dropout": 0.1,
  "spline_K": 16,
  "seed": 1337,
  "device": "cpu"
}
Saved: /content/export/tables/coverage_windows.csv

RESULTS SUMMARY
Per-asset × horizon results CSV: /content/export/tables/baseline_results_per_asset_horizon.csv
     model asset  horizon      MAE     RMSE      sMAPE       DA
KAN-spline CLOSE        1 0.362014 3.041022 199.553375 0.548387
      LSTM CLOSE        1 0.395214 3.040620 197.622009 0.544592
     Naive CLOSE        1 0.536916 4.310656  30.184805 0.000000
       SMA CLOSE        1 0.538268 3.339615  59.478020 0.639469
KAN-spline CLOSE        3 0.363296 3.046820 199.423523 0.544762
      LSTM CLOSE        3 0.363687 3.048740 161.150421 0.544762
     Naive CLOSE        3 0.54075

In [6]:
!pip -q install torchinfo torchviz > /dev/null

import os, json, math, random, warnings
from dataclasses import dataclass, asdict
from typing import Dict, Tuple, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary as torch_summary
warnings.filterwarnings("ignore", category=FutureWarning)
try:
    from torchviz import make_dot
    TORCHVIZ_OK = True
except Exception:
    TORCHVIZ_OK = False

BASE_DIR = "/content"
TBL_DIR  = f"{BASE_DIR}/export/tables"
FIG_DIR  = f"{BASE_DIR}/export/figures"
LOG_DIR  = f"{BASE_DIR}/export/logs"
for d in [TBL_DIR, FIG_DIR, LOG_DIR]:
    os.makedirs(d, exist_ok=True)

@dataclass
class DRLConfig:
    ds_csv: str = f"{BASE_DIR}/export/tables/dataset_long_1D.csv"
    assets_max: int = 5
    horizons: Tuple[int,...] = (1,3,7)
    win: int = 64
    batch_size: int = 256
    teacher_rounds: int = 200
    train_steps_per_pull: int = 1
    lr: float = 2e-3
    gamma: float = 0.99
    entropy_beta: float = 1e-3
    aux_sup_weight: float = 1e-1
    ucb_c: float = 1.2
    seed: int = 1337
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    lstm_hidden: int = 64
    lstm_layers: int = 1
    lstm_dropout: float = 0.0
    eval_n_trace: int = 400

DRLCFG = DRLConfig()
random.seed(DRLCFG.seed); np.random.seed(DRLCFG.seed); torch.manual_seed(DRLCFG.seed)
print("§4.3 Config\n", json.dumps(asdict(DRLCFG), indent=2))

df = pd.read_csv(DRLCFG.ds_csv, parse_dates=["timestamp"])
assert {"timestamp","symbol","close","split"}.issubset(df.columns)
df["close"] = pd.to_numeric(df["close"], errors="coerce")
df = df.dropna(subset=["close"]).reset_index(drop=True)

top_assets = (df.groupby("symbol").size()
              .sort_values(ascending=False)
              .head(DRLCFG.assets_max).index.tolist())
df = df[df["symbol"].isin(top_assets)].copy()

scalers: Dict[str, Tuple[float,float]] = {}
for a, g in df[df["split"]=="train"].groupby("symbol"):
    mu = float(g["close"].mean())
    sd = float(g["close"].std(ddof=0))
    scalers[a] = (mu, sd if sd > 0 else 1.0)

def zscore(a, x):
    mu, sd = scalers[a]; return (x - mu) / sd
def inv_z(a, z):
    mu, sd = scalers[a]; return z*sd + mu

df = df.sort_values(["symbol","timestamp"]).copy()
df["z"] = df.apply(lambda r: zscore(r["symbol"], r["close"]), axis=1)

def make_xy(series: pd.Series, horizon: int, win: int):
    x_list, y_list, last_list = [], [], []
    vals = series.values
    for t in range(win-1, len(vals)-horizon):
        x = vals[t-win+1:t+1]
        y = vals[t+horizon]
        last = vals[t]
        x_list.append(x.astype(np.float32))
        y_list.append(np.float32(y))
        last_list.append(np.float32(last))
    return np.stack(x_list), np.stack(y_list), np.stack(last_list)

data_xy = {}
coverage = []
for a in top_assets:
    g = df[df["symbol"]==a].copy()
    for split in ["train","val","test"]:
        gz = g[g["split"]==split]["z"].reset_index(drop=True)
        for h in DRLCFG.horizons:
            if len(gz) >= DRLCFG.win + h + 1:
                X, Y, LAST = make_xy(gz, h, DRLCFG.win)
                data_xy[(a, split, h)] = (X, Y, LAST)
                coverage.append((a, split, h, len(Y)))
            else:
                coverage.append((a, split, h, 0))

pd.DataFrame(coverage, columns=["asset","split","horizon","samples"]).to_csv(
    f"{TBL_DIR}/acg_coverage_windows.csv", index=False
)

arms: List[Tuple[str,int]] = []
for a in top_assets:
    for h in DRLCFG.horizons:
        ok = all(((a, sp, h) in data_xy and data_xy[(a,sp,h)][1].shape[0] > 0) for sp in ["train","val","test"])
        if ok: arms.append((a,h))
assert len(arms) > 0
n_arms = len(arms)
print(f"Arms (asset,horizon): {arms}")

class PolicyLSTM(nn.Module):
    def __init__(self, hidden=64, layers=1, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden, num_layers=layers,
                            dropout=(dropout if layers>1 else 0.0), batch_first=True)
        self.mu = nn.Sequential(nn.Linear(hidden, hidden), nn.ReLU(), nn.Linear(hidden, 1))
        self.log_sigma = nn.Parameter(torch.tensor(-0.5))
    def forward(self, x):
        o, _ = self.lstm(x)
        last = o[:, -1, :]
        mu = self.mu(last)
        log_sigma = self.log_sigma.expand_as(mu)
        return mu, log_sigma

def count_params(m):
    return sum(p.numel() for p in m.parameters() if p.requires_grad)

device = torch.device(DRLCFG.device)
policy = PolicyLSTM(hidden=DRLCFG.lstm_hidden, layers=DRLCFG.lstm_layers, dropout=DRLCFG.lstm_dropout).to(device)
opt = torch.optim.AdamW(policy.parameters(), lr=DRLCFG.lr)

class UCB1Teacher:
    def __init__(self, n_arms, c=1.2):
        self.c = c
        self.n = np.zeros(n_arms, dtype=np.int64)
        self.mean = np.zeros(n_arms, dtype=np.float64)
        self.t = 0
    def select(self):
        self.t += 1
        for i in range(n_arms):
            if self.n[i] == 0:
                return i
        ucb = self.mean + self.c * np.sqrt(2.0 * math.log(self.t) / self.n)
        return int(np.argmax(ucb))
    def update(self, i, reward):
        self.n[i] += 1
        self.mean[i] += (reward - self.mean[i]) / self.n[i]

teacher = UCB1Teacher(n_arms, DRLCFG.ucb_c)

def eval_val_mse(policy: nn.Module, arm_idx: int) -> float:
    a, h = arms[arm_idx]
    X, Y, _ = data_xy[(a,"val",h)]
    with torch.no_grad():
        xb = torch.from_numpy(X).to(device)[:, :, None]
        mu, _ = policy(xb)
        yhat = mu.squeeze(1).cpu().numpy()
    return float(np.mean((Y - yhat)**2))

last_val_mse = np.array([eval_val_mse(policy, i) for i in range(n_arms)], dtype=np.float64)

def sample_train_minibatch(arm_idx: int, batch_size: int):
    a, h = arms[arm_idx]
    X, Y, LAST = data_xy[(a,"train",h)]
    idx = np.random.randint(0, len(Y), size=(min(batch_size, len(Y)),))
    xb = torch.from_numpy(X[idx]).float().to(device)[:, :, None]
    yb = torch.from_numpy(Y[idx]).float().to(device)[:, None]
    lastb = torch.from_numpy(LAST[idx]).float().to(device)[:, None]
    return xb, yb, lastb

def mae(y, yhat): return float(np.mean(np.abs(y - yhat)))
def rmse(y, yhat): return float(np.sqrt(np.mean((y - yhat)**2)))
def smape(y, yhat, eps=1e-8):
    denom = (np.abs(y) + np.abs(yhat) + eps) / 2.0
    return float(np.mean(np.abs(y - yhat) / denom) * 100.0)
def dir_acc(y_prev, y_true, yhat):
    s_true = np.sign(y_true - y_prev)
    s_pred = np.sign(yhat - y_prev)
    return float(np.mean((s_true == s_pred).astype(np.float32)))

baseline_reward = 0.0
baseline_momentum = 0.9

def reinforce_step(xb, yb):
    policy.train()
    opt.zero_grad()
    mu, log_sigma = policy(xb)
    sigma = torch.exp(log_sigma)
    dist = torch.distributions.Normal(mu, sigma)
    a = dist.rsample()
    r = - (a - yb)**2
    global baseline_reward
    avg_r = r.mean().detach()
    advantage = r - baseline_reward
    baseline_reward = baseline_momentum*baseline_reward + (1.0-baseline_momentum)*avg_r
    logp = dist.log_prob(a)
    loss_policy = - (logp * advantage.detach()).mean()
    loss_entropy = - DRLCFG.entropy_beta * dist.entropy().mean()
    loss_aux = DRLCFG.aux_sup_weight * nn.MSELoss()(mu, yb)
    loss = loss_policy + loss_entropy + loss_aux
    loss.backward()
    nn.utils.clip_grad_norm_(policy.parameters(), 1.0)
    opt.step()

for i in range(n_arms):
    xb, yb, _ = sample_train_minibatch(i, DRLCFG.batch_size)
    reinforce_step(xb, yb)

log = {
    "round": [], "arm": [], "asset": [], "horizon": [],
    "reward_lp": [], "val_mse_before": [], "val_mse_after": [],
    "mean_reward_est": [], "pulls_arm": []
}

for t in range(1, DRLCFG.teacher_rounds + 1):
    i = teacher.select()
    a, h = arms[i]
    val_before = eval_val_mse(policy, i)
    for _ in range(DRLCFG.train_steps_per_pull):
        xb, yb, _ = sample_train_minibatch(i, DRLCFG.batch_size)
        reinforce_step(xb, yb)
    val_after = eval_val_mse(policy, i)
    reward_lp = float(val_before - val_after)
    teacher.update(i, reward_lp)
    last_val_mse[i] = val_after
    log["round"].append(t)
    log["arm"].append(i)
    log["asset"].append(a)
    log["horizon"].append(h)
    log["reward_lp"].append(reward_lp)
    log["val_mse_before"].append(val_before)
    log["val_mse_after"].append(val_after)
    log["mean_reward_est"].append(float(teacher.mean[i]))
    log["pulls_arm"].append(int(teacher.n[i]))

log_df = pd.DataFrame(log)
log_csv = f"{TBL_DIR}/acg_teacher_log.csv"
log_df.to_csv(log_csv, index=False)

means_per_arm = np.zeros(n_arms)
counts_per_arm = np.zeros(n_arms, dtype=np.int64)
inst_regret = []
for r in range(len(log_df)):
    rew = log_df.iloc[r]["reward_lp"]
    means_snapshot = {int(ai): float(am) for ai, am in zip(log_df["arm"][:r+1], log_df["mean_reward_est"][:r+1])}
    best_mean = max(means_snapshot.values()) if means_snapshot else 0.0
    inst_regret.append(best_mean - rew)
regret = np.cumsum(inst_regret)

sel_counts = log_df["arm"].value_counts().sort_index()
sel_df = pd.DataFrame({
    "arm_idx": list(range(n_arms)),
    "asset": [arms[i][0] for i in range(n_arms)],
    "horizon": [arms[i][1] for i in range(n_arms)],
    "pulls": [int(sel_counts.get(i, 0)) for i in range(n_arms)],
    "mean_reward": [float(teacher.mean[i]) for i in range(n_arms)]
})
sel_df.to_csv(f"{TBL_DIR}/acg_selection_counts.csv", index=False)

def savefig(fname):
    pth = os.path.join(FIG_DIR, fname)
    plt.tight_layout(); plt.savefig(pth, dpi=150); plt.close(); return pth

plt.figure(figsize=(8,3))
plt.plot(log_df["arm"].values, lw=1)
plt.yticks(range(n_arms), [f"{arms[i][0]}-H{arms[i][1]}" for i in range(n_arms)])
plt.xlabel("Round"); plt.ylabel("Selected arm"); plt.title("Bandit arm selection over time (UCB1)")
f_sel_over_time = savefig("acg_arm_selection_over_time.png")

labels = [f"{a}-H{h}" for a,h in arms]
plt.figure(figsize=(6,3))
plt.bar(range(n_arms), sel_df["pulls"].values)
plt.xticks(range(n_arms), labels, rotation=45, ha="right")
plt.ylabel("Pulls"); plt.title("Selection counts by arm")
f_sel_counts = savefig("acg_selection_counts.png")

plt.figure(figsize=(7,3))
plt.plot(pd.Series(log_df["reward_lp"]).rolling(10).mean(), label="Rolling-10 mean reward")
plt.axhline(0.0, color="black", lw=1)
plt.legend(); plt.title("Learning progress (Δ val MSE) — rolling mean")
plt.xlabel("Round"); plt.ylabel("Reward (↓MSE)")
f_reward_curve = savefig("acg_reward_curve.png")

plt.figure(figsize=(7,3))
plt.plot(regret)
plt.title("Cumulative regret (approx.)")
plt.xlabel("Round"); plt.ylabel("Regret")
f_regret = savefig("acg_cumulative_regret.png")

plt.figure(figsize=(6,3))
plt.bar(range(n_arms), sel_df["mean_reward"].values)
plt.xticks(range(n_arms), labels, rotation=45, ha="right")
plt.ylabel("Mean reward"); plt.title("Per-arm mean learning progress")
f_mean_reward = savefig("acg_mean_reward_by_arm.png")

val_after_latest = []
for i in range(n_arms):
    val_mse = eval_val_mse(policy, i)
    val_after_latest.append(val_mse)
val_snap_df = pd.DataFrame({
    "arm_idx": list(range(n_arms)),
    "asset": [arms[i][0] for i in range(n_arms)],
    "horizon": [arms[i][1] for i in range(n_arms)],
    "val_mse_final": val_after_latest,
    "pulls": sel_df["pulls"].values
})
val_snap_df.to_csv(f"{TBL_DIR}/acg_val_mse_final.csv", index=False)

plt.figure(figsize=(6,3))
plt.bar(range(n_arms), val_snap_df["val_mse_final"].values)
plt.xticks(range(n_arms), labels, rotation=45, ha="right")
plt.ylabel("Val MSE"); plt.title("Final validation MSE by arm")
f_val_mse_final = savefig("acg_val_mse_final_bars.png")

model_params = count_params(policy)
ms = torch_summary(policy, input_size=(1, DRLCFG.win, 1), verbose=0)
with open(os.path.join(TBL_DIR, "acg_student_model_summary.txt"), "w") as f:
    f.write(str(ms))
    f.write(f"\nTotal trainable parameters: {model_params}\n")

if TORCHVIZ_OK:
    try:
        xdummy = torch.randn(1, DRLCFG.win, 1).to(device)
        mu, _ = policy(xdummy)
        make_dot(mu, params=dict(list(policy.named_parameters()))).render(
            os.path.join(FIG_DIR, "acg_student_graph"), format="png", cleanup=True
        )
    except Exception as e:
        print("torchviz failed:", e)

results = []
for i in range(n_arms):
    a, h = arms[i]
    Xte, Yte, LASTte = data_xy[(a,"test",h)]
    with torch.no_grad():
        xb = torch.from_numpy(Xte).float().to(device)[:, :, None]
        mu, _ = policy(xb)
        yhat = mu.squeeze(1).cpu().numpy()
    y_true = Yte
    y_prev = LASTte
    results.append(dict(model="ACG-DRL", asset=a, horizon=h,
                        MAE=mae(y_true, yhat), RMSE=rmse(y_true, yhat),
                        sMAPE=smape(y_true, yhat), DA=dir_acc(y_prev, y_true, yhat)))

res_df = pd.DataFrame(results).sort_values(["asset","horizon"])
res_csv = f"{TBL_DIR}/acg_results_per_arm.csv"
res_df.to_csv(res_csv, index=False)

agg_df = (res_df.groupby("horizon")
          .agg(MAE=("MAE","mean"), RMSE=("RMSE","mean"), sMAPE=("sMAPE","mean"), DA=("DA","mean"))
          .reset_index().sort_values("horizon"))
agg_csv = f"{TBL_DIR}/acg_results_agg_by_horizon.csv"
agg_df.to_csv(agg_csv, index=False)

print("\n§4.3 RESULTS SUMMARY")
print("Arms (asset,horizon):", arms)
print("Selection counts & mean rewards CSV:", f"{TBL_DIR}/acg_selection_counts.csv")
print(sel_df.to_string(index=False))
print("\nTeacher rounds:", DRLCFG.teacher_rounds, "| UCB1 c =", DRLCFG.ucb_c)
print("Teacher log CSV:", log_csv)
print("Cumulative regret plotted in:", f_regret)
print("\nACG-DRL per-arm TEST results CSV:", res_csv)
print(res_df.head(len(res_df)).to_string(index=False))
print("\nAggregated by horizon CSV:", agg_csv)
print(agg_df.to_string(index=False))

figs = sorted([os.path.join(FIG_DIR, f) for f in os.listdir(FIG_DIR) if f.endswith(".png")])
print("\nFIGURES SAVED (first 40)")
for p in figs[:40]:
    print(p)
if len(figs) > 40:
    print(f"... and {len(figs)-40} more figures")

print("\nArtifacts:")
print(" - Student model summary:", os.path.join(TBL_DIR, "acg_student_model_summary.txt"))
print(" - Coverage windows:", f"{TBL_DIR}/acg_coverage_windows.csv")
print(" - Final val MSE by arm:", f"{TBL_DIR}/acg_val_mse_final.csv")


§4.3 Config
 {
  "ds_csv": "/content/export/tables/dataset_long_1D.csv",
  "assets_max": 5,
  "horizons": [
    1,
    3,
    7
  ],
  "win": 64,
  "batch_size": 256,
  "teacher_rounds": 200,
  "train_steps_per_pull": 1,
  "lr": 0.002,
  "gamma": 0.99,
  "entropy_beta": 0.001,
  "aux_sup_weight": 0.1,
  "ucb_c": 1.2,
  "seed": 1337,
  "device": "cpu",
  "lstm_hidden": 64,
  "lstm_layers": 1,
  "lstm_dropout": 0.0,
  "eval_n_trace": 400
}
Arms (asset,horizon): [('CLOSE', 1), ('CLOSE', 3), ('CLOSE', 7), ('HIGH', 1), ('HIGH', 3), ('HIGH', 7), ('LOW', 1), ('LOW', 3), ('LOW', 7), ('OPEN', 1), ('OPEN', 3), ('OPEN', 7)]

§4.3 RESULTS SUMMARY
Arms (asset,horizon): [('CLOSE', 1), ('CLOSE', 3), ('CLOSE', 7), ('HIGH', 1), ('HIGH', 3), ('HIGH', 7), ('LOW', 1), ('LOW', 3), ('LOW', 7), ('OPEN', 1), ('OPEN', 3), ('OPEN', 7)]
Selection counts & mean rewards CSV: /content/export/tables/acg_selection_counts.csv
 arm_idx asset  horizon  pulls  mean_reward
       0 CLOSE        1     17     0.000515
     

In [7]:
!pip -q install torchinfo torchviz > /dev/null

import os, json, math, random, warnings
from dataclasses import dataclass, asdict
from typing import Dict, Tuple, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchinfo import summary as torch_summary

warnings.filterwarnings("ignore", category=FutureWarning)
try:
    from torchviz import make_dot
    TORCHVIZ_OK = True
except Exception:
    TORCHVIZ_OK = False

# Paths
BASE_DIR = "/content"
TBL_DIR  = f"{BASE_DIR}/export/tables"
FIG_DIR  = f"{BASE_DIR}/export/figures"
LOG_DIR  = f"{BASE_DIR}/export/logs"
for d in [TBL_DIR, FIG_DIR, LOG_DIR]:
    os.makedirs(d, exist_ok=True)

# Config
@dataclass
class RegimeACGConfig:
    ds_csv: str = f"{BASE_DIR}/export/tables/dataset_long_1D.csv"
    assets_max: int = 5
    horizons: Tuple[int,...] = (1,3,7)
    win: int = 64
    pe_win: int = 64
    pe_m: int = 4
    pe_tau: int = 1
    min_samples_per_arm: int = 128
    batch_size: int = 256
    teacher_rounds: int = 240
    train_steps_per_pull: int = 1
    lr: float = 2e-3
    entropy_beta: float = 1e-3
    aux_sup_weight: float = 1e-1
    ucb_c: float = 1.2
    seed: int = 1337
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    lstm_hidden: int = 64
    lstm_layers: int = 1
    lstm_dropout: float = 0.0
    eval_n_trace: int = 400

CFG = RegimeACGConfig()
random.seed(CFG.seed); np.random.seed(CFG.seed); torch.manual_seed(CFG.seed)
print("§4.4 Config\n", json.dumps(asdict(CFG), indent=2))

# Load dataset and standardize (train-only stats)
df = pd.read_csv(CFG.ds_csv, parse_dates=["timestamp"])
assert {"timestamp","symbol","close","split"}.issubset(df.columns)
df["close"] = pd.to_numeric(df["close"], errors="coerce")
df = df.dropna(subset=["close"]).reset_index(drop=True)

top_assets = (df.groupby("symbol").size()
              .sort_values(ascending=False)
              .head(CFG.assets_max).index.tolist())
df = df[df["symbol"].isin(top_assets)].copy().sort_values(["symbol","timestamp"])

scalers: Dict[str, Tuple[float,float]] = {}
for a, g in df[df["split"]=="train"].groupby("symbol"):
    mu = float(g["close"].mean())
    sd = float(g["close"].std(ddof=0))
    scalers[a] = (mu, sd if sd > 0 else 1.0)

def zscore(asset, x):
    mu, sd = scalers[asset]; return (x - mu) / sd

df["z"] = df.apply(lambda r: zscore(r["symbol"], r["close"]), axis=1)

# Permutation entropy
from math import factorial

def _perm_index(perm: np.ndarray) -> int:
    m = len(perm); code = 0
    for i in range(m):
        c = 0
        for j in range(i+1, m):
            if perm[j] < perm[i]: c += 1
        code = code * (m - i) + c
    return code

def permutation_entropy_window(x: np.ndarray, m: int = 4) -> float:
    n = len(x)
    if n < m: return np.nan
    counts = np.zeros(factorial(m), dtype=np.int64)
    for i in range(n - m + 1):
        pat = np.argsort(x[i:i+m], kind="mergesort")
        counts[_perm_index(pat)] += 1
    total = counts.sum()
    if total == 0: return np.nan
    p = counts[counts>0].astype(np.float64) / total
    H = -np.sum(p * np.log(p))
    return float(H / np.log(factorial(m)))

def permutation_entropy_series(x: np.ndarray, win: int = 64, m: int = 4) -> np.ndarray:
    n = len(x); out = np.full(n, np.nan, dtype=np.float32)
    for t in range(win-1, n):
        out[t] = permutation_entropy_window(x[t-win+1:t+1], m=m)
    return out

# Compute PE and regimes (train tertiles)
pe_records = []
for a, g in df.groupby("symbol"):
    z_all = g["z"].values.astype(np.float32)
    H = permutation_entropy_series(z_all, win=CFG.pe_win, m=CFG.pe_m)
    g = g.copy(); g["pe_norm"] = H
    g_train = g[g["split"]=="train"].dropna(subset=["pe_norm"])
    if len(g_train) < 32:
        thr1, thr2 = np.nanpercentile(g["pe_norm"], [33, 67])
    else:
        thr1, thr2 = np.nanpercentile(g_train["pe_norm"], [33, 67])
    def tier(h):
        if np.isnan(h): return np.nan
        if h <= thr1: return "low"
        if h <= thr2: return "mid"
        return "high"
    g["regime"] = g["pe_norm"].apply(tier)
    pe_records.append(g)

df_pe = pd.concat(pe_records, ignore_index=True)
df_pe.to_csv(f"{TBL_DIR}/perm_entropy_with_regimes.csv", index=False)

# Example BTC segmentation figure
def plot_btc_segmentation():
    g = df_pe[df_pe["symbol"]=="BTC"].dropna(subset=["pe_norm"]).copy()
    if len(g)==0: return None
    fig, ax = plt.subplots(2, 1, figsize=(10,5), sharex=True)
    ax[0].plot(g["timestamp"], g["z"], lw=0.8); ax[0].set_title("BTC standardized close")
    ax[1].plot(g["timestamp"], g["pe_norm"], lw=0.8); ax[1].set_title(f"BTC permutation entropy (m={CFG.pe_m}, win={CFG.pe_win})")
    for tier_name, color in [("low","#d0f0c0"), ("mid","#fff3b0"), ("high","#f4cccc")]:
        mask = (g["regime"]==tier_name)
        if mask.any():
            idx = np.where(mask.values)[0]; start=None
            for i in range(len(idx)):
                if start is None: start = idx[i]
                if i==len(idx)-1 or idx[i+1] != idx[i]+1:
                    s, e = start, idx[i]
                    ax[1].axvspan(g["timestamp"].iloc[s], g["timestamp"].iloc[e], color=color, alpha=0.3)
                    start=None
    for a in ax: a.grid(True, alpha=0.2)
    plt.tight_layout()
    p = os.path.join(FIG_DIR, "regime_segmentation_BTC.png")
    plt.savefig(p, dpi=150); plt.close()
    return p

seg_fig = plot_btc_segmentation()

# Regime-aware datasets
def make_xy_with_regime(series: pd.Series, regimes: pd.Series, horizon: int, win: int):
    vals, regs = series.values, regimes.values
    out = {"low":[], "mid":[], "high":[]}; last_out = {"low":[], "mid":[], "high":[]}; y_out = {"low":[], "mid":[], "high":[]}
    for t in range(win-1, len(vals)-horizon):
        r = regs[t]
        if r not in out: continue
        x = vals[t-win+1:t+1].astype(np.float32)
        y = np.float32(vals[t+horizon])
        out[r].append(x); y_out[r].append(y); last_out[r].append(np.float32(vals[t]))
    out2 = {}
    for r in ["low","mid","high"]:
        if len(out[r])>0:
            X = np.stack(out[r])[:, :, None].astype(np.float32)
            Y = np.stack(y_out[r])[:, None].astype(np.float32)
            LAST = np.stack(last_out[r])[:, None].astype(np.float32)
            out2[r] = (X, Y, LAST)
    return out2

data_xy = {}; coverage = []
for a, g in df_pe.groupby("symbol"):
    g = g.sort_values("timestamp").copy()
    for split in ["train","val","test"]:
        gz = g[g["split"]==split]
        z, reg = gz["z"].reset_index(drop=True), gz["regime"].reset_index(drop=True)
        for h in CFG.horizons:
            buckets = make_xy_with_regime(z, reg, h, CFG.win)
            for r, tup in buckets.items():
                X, Y, LAST = tup
                key = (a, split, h, r)
                data_xy[key] = (X, Y, LAST)
                coverage.append((a, split, h, r, len(Y)))

pd.DataFrame(coverage, columns=["asset","split","horizon","regime","samples"]).to_csv(
    f"{TBL_DIR}/acg_regime_coverage_windows.csv", index=False
)

# Arms: (asset,horizon,regime)
arms: List[Tuple[str,int,str]] = []
for a in top_assets:
    for h in CFG.horizons:
        for r in ["low","mid","high"]:
            ok = True
            for sp in ["train","val","test"]:
                key = (a, sp, h, r)
                if key not in data_xy: ok=False; break
                if data_xy[key][1].shape[0] < CFG.min_samples_per_arm: ok=False; break
            if ok: arms.append((a,h,r))
assert len(arms)>0, "No regime-aware arms; reduce CFG.min_samples_per_arm."
n_arms = len(arms)
print(f"Arms (asset,horizon,regime): {arms}")

# Student model
class PolicyLSTM(nn.Module):
    def __init__(self, hidden=64, layers=1, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=hidden, num_layers=layers,
                            dropout=(dropout if layers>1 else 0.0), batch_first=True)
        self.mu = nn.Sequential(nn.Linear(hidden, hidden), nn.ReLU(), nn.Linear(hidden, 1))
        self.log_sigma = nn.Parameter(torch.tensor(-0.5))
    def forward(self, x):
        o, _ = self.lstm(x)
        last = o[:, -1, :]
        mu = self.mu(last)
        return mu, self.log_sigma.expand_as(mu)

def count_params(m): return sum(p.numel() for p in m.parameters() if p.requires_grad)

device = torch.device(CFG.device)
policy = PolicyLSTM(hidden=CFG.lstm_hidden, layers=CFG.lstm_layers, dropout=CFG.lstm_dropout).to(device)
opt = torch.optim.AdamW(policy.parameters(), lr=CFG.lr)

# Teacher (UCB1)
class UCB1Teacher:
    def __init__(self, n_arms, c=1.2):
        self.c = c; self.n = np.zeros(n_arms, dtype=np.int64)
        self.mean = np.zeros(n_arms, dtype=np.float64); self.t = 0
    def select(self):
        self.t += 1
        for i in range(n_arms):
            if self.n[i]==0: return i
        ucb = self.mean + self.c * np.sqrt(2.0 * math.log(self.t) / self.n)
        return int(np.argmax(ucb))
    def update(self, i, reward):
        self.n[i] += 1
        self.mean[i] += (reward - self.mean[i]) / self.n[i]

teacher = UCB1Teacher(n_arms, CFG.ucb_c)

# Batching, eval, metrics
def sample_train_minibatch(arm_idx: int, batch_size: int):
    a,h,r = arms[arm_idx]
    X,Y,L = data_xy[(a,"train",h,r)]
    idx = np.random.randint(0, len(Y), size=(min(batch_size, len(Y)),))
    xb = torch.from_numpy(X[idx]).float().to(device)
    yb = torch.from_numpy(Y[idx]).float().to(device)
    lb = torch.from_numpy(L[idx]).float().to(device)
    return xb, yb, lb

def eval_val_mse(policy, arm_idx: int) -> float:
    a,h,r = arms[arm_idx]
    X,Y,_ = data_xy[(a,"val",h,r)]
    with torch.no_grad():
        xb = torch.from_numpy(X).float().to(device)
        mu, _ = policy(xb)
        yhat = mu.squeeze(1).cpu().numpy()
    return float(np.mean((Y.squeeze(1) - yhat)**2))

def mae(y, yhat): return float(np.mean(np.abs(y - yhat)))
def rmse(y, yhat): return float(np.sqrt(np.mean((y - yhat)**2)))
def smape(y, yhat, eps=1e-8):
    denom = (np.abs(y) + np.abs(yhat) + eps)/2.0
    return float(np.mean(np.abs(y - yhat)/denom) * 100.0)
def dir_acc(y_prev, y_true, yhat):
    s_true = np.sign(y_true - y_prev); s_pred = np.sign(yhat - y_prev)
    return float(np.mean((s_true == s_pred).astype(np.float32)))

# REINFORCE step
baseline_reward = 0.0
baseline_momentum = 0.9
def reinforce_step(xb, yb):
    policy.train()
    opt.zero_grad()
    mu, log_sigma = policy(xb)
    sigma = torch.exp(log_sigma)
    dist = torch.distributions.Normal(mu, sigma)
    a = dist.rsample()
    r = - (a - yb)**2
    global baseline_reward
    avg_r = r.mean().detach()
    advantage = r - baseline_reward
    baseline_reward = baseline_momentum * baseline_reward + (1.0 - baseline_momentum) * avg_r
    loss_policy = - (dist.log_prob(a) * advantage.detach()).mean()
    loss_entropy = - CFG.entropy_beta * dist.entropy().mean()
    loss_aux = CFG.aux_sup_weight * nn.MSELoss()(mu, yb)
    loss = loss_policy + loss_entropy + loss_aux
    loss.backward()
    nn.utils.clip_grad_norm_(policy.parameters(), 1.0)
    opt.step()

# Warm start
for i in range(n_arms):
    xb, yb, _ = sample_train_minibatch(i, CFG.batch_size)
    reinforce_step(xb, yb)

# Bandit loop
log = {"round":[], "arm":[], "asset":[], "horizon":[], "regime":[],
       "reward_lp":[], "val_before":[], "val_after":[], "mean_reward_est":[], "pulls_arm":[]}

for t in range(1, CFG.teacher_rounds + 1):
    i = teacher.select()
    a,h,r = arms[i]
    val_before = eval_val_mse(policy, i)
    xb, yb, _ = sample_train_minibatch(i, CFG.batch_size)
    for _ in range(CFG.train_steps_per_pull): reinforce_step(xb, yb)
    val_after = eval_val_mse(policy, i)
    reward_lp = float(val_before - val_after)
    teacher.update(i, reward_lp)

    log["round"].append(t); log["arm"].append(i)
    log["asset"].append(a); log["horizon"].append(h); log["regime"].append(r)
    log["reward_lp"].append(reward_lp)
    log["val_before"].append(val_before); log["val_after"].append(val_after)
    log["mean_reward_est"].append(float(teacher.mean[i])); log["pulls_arm"].append(int(teacher.n[i]))

log_df = pd.DataFrame(log)
log_csv = f"{TBL_DIR}/acg_regime_teacher_log.csv"
log_df.to_csv(log_csv, index=False)

sel_df = (log_df.groupby(["arm","asset","horizon","regime"])
          .size().reset_index(name="pulls").sort_values("arm"))
sel_df["mean_reward"] = [float(teacher.mean[int(i)]) for i in sel_df["arm"].values]
sel_df.to_csv(f"{TBL_DIR}/acg_regime_selection_counts.csv", index=False)

# Regime transitions
reg_map = {"low":0,"mid":1,"high":2}
reg_list = log_df["regime"].map(reg_map).values
trans = np.zeros((3,3), dtype=np.int64)
for i in range(1, len(reg_list)):
    trans[reg_list[i-1], reg_list[i]] += 1
trans_df = pd.DataFrame(trans, index=["low","mid","high"], columns=["low","mid","high"])
trans_df.to_csv(f"{TBL_DIR}/acg_regime_transitions.csv")

plt.figure(figsize=(4.5,4))
plt.imshow(trans, cmap="Blues")
plt.xticks([0,1,2], ["low","mid","high"]); plt.yticks([0,1,2], ["low","mid","high"])
plt.title("Regime transitions (teacher selections)")
for i in range(3):
    for j in range(3):
        plt.text(j, i, str(trans[i,j]), ha="center", va="center")
plt.tight_layout()
plt.savefig(os.path.join(FIG_DIR, "acg_regime_transition_heatmap.png"), dpi=150); plt.close()

# Plots
def savefig(fname):
    p = os.path.join(FIG_DIR, fname); plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p

labels = [f"{a}-H{h}-{r}" for (a,h,r) in arms]

plt.figure(figsize=(9,3))
plt.plot(log_df["arm"].values, lw=0.8)
plt.yticks(range(len(labels)), labels)
plt.xlabel("Round"); plt.ylabel("Selected arm")
plt.title("Regime-aware arm selection over time")
f_sel_over_time = savefig("acg_regime_arm_selection_over_time.png")

plt.figure(figsize=(9,2.6))
plt.scatter(log_df["round"], log_df["regime"].map({"low":0,"mid":1,"high":2}), s=8)
plt.yticks([0,1,2], ["low","mid","high"])
plt.title("Selected regime over time"); plt.xlabel("Round"); plt.ylabel("Regime")
f_reg_over_time = savefig("acg_selected_regime_over_time.png")

plt.figure(figsize=(8,3))
plt.bar(range(len(labels)), sel_df.sort_values("arm")["pulls"].values)
plt.xticks(range(len(labels)), labels, rotation=60, ha="right")
plt.ylabel("Pulls"); plt.title("Selection counts by arm (regime-aware)")
f_sel_counts = savefig("acg_regime_selection_counts.png")

plt.figure(figsize=(8,3))
plt.bar(range(len(labels)), sel_df.sort_values("arm")["mean_reward"].values)
plt.xticks(range(len(labels)), labels, rotation=60, ha="right")
plt.ylabel("Mean reward (Δ val MSE)"); plt.title("Mean learning progress by arm")
f_mean_reward = savefig("acg_regime_mean_reward_by_arm.png")

plt.figure(figsize=(7,3))
plt.plot(pd.Series(log_df["reward_lp"]).rolling(10).mean())
plt.axhline(0.0, color="black", lw=1)
plt.title("Learning progress (rolling-10 mean)"); plt.xlabel("Round"); plt.ylabel("Δ val MSE")
f_reward_curve = savefig("acg_regime_reward_curve.png")

inst_regret = []
for r in range(len(log_df)):
    means_snapshot = (log_df.iloc[:r+1].groupby("arm")["mean_reward_est"].last().to_dict())
    best_mean = max(means_snapshot.values()) if len(means_snapshot)>0 else 0.0
    inst_regret.append(best_mean - log_df.iloc[r]["reward_lp"])
regret = np.cumsum(inst_regret)

plt.figure(figsize=(7,3))
plt.plot(regret); plt.title("Cumulative regret (approx.)")
plt.xlabel("Round"); plt.ylabel("Regret")
f_regret = savefig("acg_regime_cumulative_regret.png")

val_after_latest = []
for i in range(n_arms):
    val_after_latest.append(eval_val_mse(policy, i))
val_snap_df = pd.DataFrame({
    "arm_idx": list(range(n_arms)),
    "asset": [a for a,_,_ in arms],
    "horizon": [h for _,h,_ in arms],
    "regime": [r for *_,r in arms],
    "val_mse_final": val_after_latest,
    "pulls": sel_df.sort_values("arm")["pulls"].values
})
val_snap_df.to_csv(f"{TBL_DIR}/acg_regime_val_mse_final.csv", index=False)

plt.figure(figsize=(9,3))
plt.bar(range(n_arms), val_snap_df["val_mse_final"].values)
plt.xticks(range(n_arms), labels, rotation=60, ha="right")
plt.ylabel("Val MSE"); plt.title("Final validation MSE by arm (regime-aware)")
f_val_mse_final = savefig("acg_regime_val_mse_final_bars.png")

reg_reward = log_df.groupby("regime")["reward_lp"].mean().reindex(["low","mid","high"])
plt.figure(figsize=(5,3))
plt.bar(reg_reward.index, reg_reward.values)
plt.ylabel("Mean reward (Δ val MSE)"); plt.title("Learning progress by regime")
f_reward_by_regime = savefig("acg_mean_reward_by_regime.png")

# Test evaluation
results = []
for i in range(n_arms):
    a,h,r = arms[i]
    X,Y,L = data_xy[(a,"test",h,r)]
    with torch.no_grad():
        xb = torch.from_numpy(X).float().to(device)
        mu, _ = policy(xb)
        yhat = mu.squeeze(1).cpu().numpy()
    y_true = Y.squeeze(1); y_prev = L.squeeze(1)
    results.append(dict(model="ACG-DRL", asset=a, horizon=h, regime=r,
                        MAE=mae(y_true, yhat), RMSE=rmse(y_true, yhat),
                        sMAPE=smape(y_true, yhat), DA=dir_acc(y_prev, y_true, yhat)))

res_df = pd.DataFrame(results).sort_values(["asset","horizon","regime"])
res_csv = f"{TBL_DIR}/acg_results_per_arm_regime.csv"
res_df.to_csv(res_csv, index=False)

agg_df = (res_df.groupby(["horizon","regime"])
          .agg(MAE=("MAE","mean"), RMSE=("RMSE","mean"), sMAPE=("sMAPE","mean"), DA=("DA","mean"))
          .reset_index().sort_values(["horizon","regime"]))
agg_csv = f"{TBL_DIR}/acg_results_agg_by_horizon_regime.csv"
agg_df.to_csv(agg_csv, index=False)

def barplot_metric(df, metric, fname, title):
    plt.figure(figsize=(7,4))
    labels_h = sorted(df["horizon"].unique())
    regs = ["low","mid","high"]
    width = 0.22; idx = np.arange(len(labels_h))
    for i, r in enumerate(regs):
        sub = df[df["regime"]==r].set_index("horizon").reindex(labels_h)
        plt.bar(idx + i*width, sub[metric].values, width=width, label=r)
    plt.xticks(idx + width, [f"H={h}" for h in labels_h])
    plt.ylabel(metric); plt.title(title); plt.legend()
    p = os.path.join(FIG_DIR, fname); plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p

p_mae = barplot_metric(agg_df, "MAE",   "acg_regime_mae_bars.png",   "MAE by horizon and regime")
p_da  = barplot_metric(agg_df, "DA",    "acg_regime_da_bars.png",    "Directional Accuracy by horizon and regime")
p_rmse= barplot_metric(agg_df, "RMSE",  "acg_regime_rmse_bars.png",  "RMSE by horizon and regime")
p_sm  = barplot_metric(agg_df, "sMAPE", "acg_regime_smape_bars.png", "sMAPE by horizon and regime")

# Model report
model_params = count_params(policy)
ms = torch_summary(policy, input_size=(1, CFG.win, 1), verbose=0)
with open(os.path.join(TBL_DIR, "acg_regime_student_model_summary.txt"), "w") as f:
    f.write(str(ms)); f.write(f"\nTotal trainable parameters: {model_params}\n")

if TORCHVIZ_OK:
    try:
        xdummy = torch.randn(1, CFG.win, 1).to(device)
        mu, _ = policy(xdummy)
        make_dot(mu, params=dict(list(policy.named_parameters()))).render(
            os.path.join(FIG_DIR, "acg_regime_student_graph"), format="png", cleanup=True
        )
    except Exception as e:
        print("torchviz failed:", e)

# Console summary
print("\n§4.4 RESULTS SUMMARY")
print("Regime coverage windows CSV:", f"{TBL_DIR}/acg_regime_coverage_windows.csv")
print("Arms (asset,horizon,regime):", arms)
print("\nSelection counts & mean rewards CSV:", f"{TBL_DIR}/acg_regime_selection_counts.csv")
print(sel_df.to_string(index=False))
print("\nRegime transition matrix CSV:", f"{TBL_DIR}/acg_regime_transitions.csv")
print(trans_df.to_string())
print("\nPer-arm test results CSV:", res_csv)
print(res_df.to_string(index=False))
print("\nAggregated by horizon × regime CSV:", agg_csv)
print(agg_df.to_string(index=False))
print("\nStudent params:", model_params, "| Summary:", os.path.join(TBL_DIR, "acg_regime_student_model_summary.txt"))

# Figure inventory
figs = sorted([os.path.join(FIG_DIR, f) for f in os.listdir(FIG_DIR) if f.endswith(".png")])
print("\nFIGURES (first 40)")
for p in figs[:40]:
    print(p)
if len(figs) > 40:
    print(f"... and {len(figs)-40} more")

print("\nKey figures:")
print(" - Regime segmentation example:", seg_fig)
print(" - Arm selection over time:", f_sel_over_time)
print(" - Selected regime over time:", f_reg_over_time)
print(" - Selection counts:", f_sel_counts)
print(" - Mean reward by arm:", f_mean_reward)
print(" - Rolling reward:", f_reward_curve)
print(" - Cumulative regret:", f_regret)
print(" - Final val MSE bars:", f_val_mse_final)
print(" - Reward by regime:", f_reward_by_regime)
print(" - Regime-stratified bars:", p_mae, p_da, p_rmse, p_sm)


§4.4 Config
 {
  "ds_csv": "/content/export/tables/dataset_long_1D.csv",
  "assets_max": 5,
  "horizons": [
    1,
    3,
    7
  ],
  "win": 64,
  "pe_win": 64,
  "pe_m": 4,
  "pe_tau": 1,
  "min_samples_per_arm": 128,
  "batch_size": 256,
  "teacher_rounds": 240,
  "train_steps_per_pull": 1,
  "lr": 0.002,
  "entropy_beta": 0.001,
  "aux_sup_weight": 0.1,
  "ucb_c": 1.2,
  "seed": 1337,
  "device": "cpu",
  "lstm_hidden": 64,
  "lstm_layers": 1,
  "lstm_dropout": 0.0,
  "eval_n_trace": 400
}
Arms (asset,horizon,regime): [('CLOSE', 1, 'high'), ('CLOSE', 3, 'high'), ('CLOSE', 7, 'high'), ('HIGH', 1, 'high'), ('HIGH', 3, 'high'), ('HIGH', 7, 'high'), ('LOW', 1, 'mid'), ('LOW', 1, 'high'), ('LOW', 3, 'mid'), ('LOW', 3, 'high'), ('LOW', 7, 'mid'), ('LOW', 7, 'high'), ('OPEN', 1, 'mid'), ('OPEN', 3, 'mid'), ('OPEN', 7, 'mid')]

§4.4 RESULTS SUMMARY
Regime coverage windows CSV: /content/export/tables/acg_regime_coverage_windows.csv
Arms (asset,horizon,regime): [('CLOSE', 1, 'high'), ('CLOSE

In [8]:
!pip -q install torchinfo torchviz > /dev/null

import os, json, math, random, warnings
from dataclasses import dataclass, asdict
from typing import Dict, Tuple, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torchinfo import summary as torch_summary

warnings.filterwarnings("ignore", category=FutureWarning)
try:
    from torchviz import make_dot
    TORCHVIZ_OK = True
except Exception:
    TORCHVIZ_OK = False

BASE_DIR = "/content"
TBL_DIR  = f"{BASE_DIR}/export/tables"
FIG_DIR  = f"{BASE_DIR}/export/figures"
LOG_DIR  = f"{BASE_DIR}/export/logs"
for d in [TBL_DIR, FIG_DIR, LOG_DIR]:
    os.makedirs(d, exist_ok=True)

@dataclass
class AblationCfg:
    ds_csv: str = f"{BASE_DIR}/export/tables/dataset_long_1D.csv"
    assets_max: int = 5
    horizons: Tuple[int,...] = (1,3,7)
    win: int = 64
    batch_size: int = 256
    epochs_supervised: int = 4
    teacher_rounds: int = 150
    train_steps_per_pull: int = 1
    lr: float = 2e-3
    entropy_beta: float = 1e-3
    aux_sup_weight: float = 1e-1
    ucb_c: float = 1.2
    seed: int = 1337
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    lstm_hidden: int = 64
    lstm_layers: int = 1
    lstm_dropout: float = 0.0
    eval_n_trace: int = 400

CFG = AblationCfg()
random.seed(CFG.seed); np.random.seed(CFG.seed); torch.manual_seed(CFG.seed)
print("§4.5 Config\n", json.dumps(asdict(CFG), indent=2))

df = pd.read_csv(CFG.ds_csv, parse_dates=["timestamp"])
assert {"timestamp","symbol","close","split"}.issubset(df.columns)
df["close"] = pd.to_numeric(df["close"], errors="coerce")
df = df.dropna(subset=["close"]).reset_index(drop=True)

top_assets = (df.groupby("symbol").size()
              .sort_values(ascending=False)
              .head(CFG.assets_max).index.tolist())
df = df[df["symbol"].isin(top_assets)].copy().sort_values(["symbol","timestamp"])

scalers_close: Dict[str, Tuple[float,float]] = {}
for a, g in df[df["split"]=="train"].groupby("symbol"):
    mu = float(g["close"].mean())
    sd = float(g["close"].std(ddof=0))
    scalers_close[a] = (mu, sd if sd>0 else 1.0)

def z_close(asset, x):
    mu, sd = scalers_close[asset]; return (x - mu) / sd

df["z"] = df.apply(lambda r: z_close(r["symbol"], r["close"]), axis=1)

def rsi(series: pd.Series, period: int = 14) -> pd.Series:
    delta = series.diff()
    up = delta.clip(lower=0.0)
    down = -delta.clip(upper=0.0)
    roll_up = up.rolling(period, min_periods=period).mean()
    roll_down = down.rolling(period, min_periods=period).mean()
    rs = roll_up / (roll_down + 1e-12)
    return 100.0 - (100.0 / (1.0 + rs))

def _sanitize_cols(df_in: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
    out = df_in.copy()
    out[cols] = out[cols].replace([np.inf, -np.inf], np.nan)
    out[cols] = out[cols].fillna(0.0)
    out[cols] = out[cols].clip(-10.0, 10.0)
    return out

def build_features(g: pd.DataFrame, augmented: bool) -> pd.DataFrame:
    g = g.sort_values("timestamp").copy()
    if not augmented:
        return _sanitize_cols(g[["timestamp","symbol","split","z"]].copy(), ["z"])

    z = g["z"].astype(float)
    ret = z.diff()
    vol14 = ret.rolling(14, min_periods=14).std()
    rsi14 = rsi(g["close"], period=14)
    ma5 = z.rolling(5, min_periods=5).mean()
    ma20 = z.rolling(20, min_periods=20).mean()
    macd = ma5 - ma20
    dist_ma20 = z - ma20

    feats = pd.DataFrame({
        "timestamp": g["timestamp"].values,
        "symbol": g["symbol"].values,
        "split": g["split"].values,
        "z": z.values,
        "dz": ret.values,
        "vol14": vol14.values,
        "rsi14": rsi14.values,
        "macd_z": macd.values,
        "dist_ma20_z": dist_ma20.values
    })

    std_cols = ["dz","vol14","rsi14","macd_z","dist_ma20_z"]
    out_list = []
    for a, ga in feats.groupby("symbol"):
        ga = ga.copy()
        train_mask = (ga["split"]=="train")
        stats = {}
        for col in std_cols:
            mu = ga.loc[train_mask, col].mean()
            sd = ga.loc[train_mask, col].std(ddof=0)
            if not pd.notna(sd) or sd <= 0: sd = 1.0
            stats[col] = (float(mu if pd.notna(mu) else 0.0), float(sd))
        for col in std_cols:
            mu, sd = stats[col]
            ga[col] = (ga[col] - mu) / sd
        ga = _sanitize_cols(ga, ["z"] + std_cols)
        out_list.append(ga)

    return pd.concat(out_list, ignore_index=True)

df_price = build_features(df, augmented=False)
df_aug   = build_features(df, augmented=True)

def make_xy_features(g: pd.DataFrame, feature_cols: List[str], horizon: int, win: int):
    vals = g[feature_cols].values.astype(np.float32)
    z = g["z"].values.astype(np.float32)
    x_list, y_list, p_list = [], [], []
    dropped = 0
    for t in range(win-1, len(vals)-horizon):
        x = vals[t-win+1:t+1, :]
        y = z[t+horizon]
        p = z[t]
        if not (np.isfinite(x).all() and np.isfinite(y) and np.isfinite(p)):
            dropped += 1
            continue
        x_list.append(x); y_list.append([y]); p_list.append([p])
    if dropped > 0:
        print(f"[make_xy_features] Dropped {dropped} windows due to non-finite values (win={win}, h={horizon}).")
    if not x_list:
        return None, None, None
    return (np.stack(x_list), np.stack(y_list), np.stack(p_list))

def build_arm_data(df_feat: pd.DataFrame, horizons, win) -> Tuple[Dict, List[Tuple[str,int]], pd.DataFrame]:
    all_cols = ["z","dz","vol14","rsi14","macd_z","dist_ma20_z"]
    feature_cols = [c for c in df_feat.columns if c in all_cols]
    data_xy, coverage, arms = {}, [], []
    for a, g in df_feat.groupby("symbol"):
        g = g.sort_values("timestamp")
        for split in ["train","val","test"]:
            gs = g[g["split"]==split].reset_index(drop=True)
            for h in horizons:
                X,Y,P = make_xy_features(gs, feature_cols, h, win)
                if X is not None:
                    data_xy[(a,split,h)] = (X,Y,P)
                    coverage.append((a,split,h,len(Y)))
        for h in horizons:
            ok = all(((a,sp,h) in data_xy and data_xy[(a,sp,h)][1].shape[0] > 0) for sp in ["train","val","test"])
            if ok: arms.append((a,h))
    cov_df = pd.DataFrame(coverage, columns=["asset","split","horizon","samples"])
    return data_xy, arms, cov_df

data_xy_price, arms_price, cov_price = build_arm_data(df_price, CFG.horizons, CFG.win)
data_xy_aug,   arms_aug,   cov_aug   = build_arm_data(df_aug,   CFG.horizons, CFG.win)

arms_common = sorted(list(set(arms_price).intersection(set(arms_aug))))
assert len(arms_common)>0, "No common feasible arms found."
print("Common feasible arms:", arms_common)

class PolicyLSTM(nn.Module):
    def __init__(self, input_dim=1, hidden=64, layers=1, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden, num_layers=layers,
                            dropout=(dropout if layers>1 else 0.0), batch_first=True)
        self.mu = nn.Sequential(nn.Linear(hidden, hidden), nn.ReLU(), nn.Linear(hidden, 1))
        self.log_sigma = nn.Parameter(torch.tensor(-0.5))
    def forward(self, x):
        o, _ = self.lstm(x)
        last = o[:, -1, :]
        mu = self.mu(last)
        mu = torch.nan_to_num(mu, nan=0.0, posinf=0.0, neginf=0.0)
        return mu, self.log_sigma.expand_as(mu)

def count_params(m): return sum(p.numel() for p in m.parameters() if p.requires_grad)

class UCB1Teacher:
    def __init__(self, n_arms, c=1.2):
        self.c=c; self.n=np.zeros(n_arms, dtype=np.int64); self.mean=np.zeros(n_arms, dtype=np.float64); self.t=0
    def select(self):
        self.t+=1
        for i in range(len(self.n)):
            if self.n[i]==0: return i
        ucb = self.mean + self.c*np.sqrt(2.0*math.log(self.t)/self.n)
        return int(np.argmax(ucb))
    def update(self, i, reward):
        self.n[i]+=1
        self.mean[i]+= (reward - self.mean[i])/self.n[i]

class UniformTeacher:
    def __init__(self, n_arms):
        self.n = np.zeros(n_arms, dtype=np.int64)
        self.mean = np.zeros(n_arms, dtype=np.float64)
        self.t=0
    def select(self):
        self.t+=1
        return int(np.random.randint(0,len(self.n)))
    def update(self, i, reward):
        self.n[i]+=1
        self.mean[i]+= (reward - self.mean[i])/self.n[i]

def mae(y, yhat): return float(np.mean(np.abs(y - yhat)))
def rmse(y, yhat): return float(np.sqrt(np.mean((y - yhat)**2)))
def smape(y, yhat, eps=1e-8):
    denom = (np.abs(y) + np.abs(yhat) + eps)/2.0
    return float(np.mean(np.abs(y - yhat)/denom) * 100.0)
def dir_acc(y_prev, y_true, yhat):
    s_true = np.sign(y_true - y_prev); s_pred = np.sign(yhat - y_prev)
    return float(np.mean((s_true == s_pred).astype(np.float32)))

device = torch.device(CFG.device)

def sample_minibatch(data_xy, arms, arm_idx, bs):
    a,h = arms[arm_idx]
    X,Y,P = data_xy[(a,"train",h)]
    idx = np.random.randint(0,len(Y), size=(min(bs, len(Y)),))
    xb = torch.from_numpy(X[idx]).float().to(device)
    yb = torch.from_numpy(Y[idx]).float().to(device)
    pb = torch.from_numpy(P[idx]).float().to(device)
    return xb, yb, pb

def eval_val_mse(model, data_xy, arms, arm_idx):
    a,h = arms[arm_idx]
    X,Y,_ = data_xy[(a,"val",h)]
    with torch.no_grad():
        xb = torch.from_numpy(X).float().to(device)
        mu, _ = model(xb)
        yhat = mu.squeeze(1).cpu().numpy()
    return float(np.mean((Y.squeeze(1) - yhat)**2))

def bandit_run(run_name: str, data_xy, arms, input_dim: int, teacher_type="ucb1", feat_tag="price"):
    model = PolicyLSTM(input_dim=input_dim, hidden=CFG.lstm_hidden, layers=CFG.lstm_layers, dropout=CFG.lstm_dropout).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=CFG.lr)
    teacher = UCB1Teacher(len(arms), c=CFG.ucb_c) if teacher_type=="ucb1" else UniformTeacher(len(arms))

    def reinforce_step(xb, yb):
        model.train(); opt.zero_grad()
        mu, log_sigma = model(xb)
        sigma = torch.exp(log_sigma)
        dist = torch.distributions.Normal(mu, sigma)
        a = dist.rsample()
        r = - (a - yb)**2
        reinforce_step.baseline = 0.9*reinforce_step.baseline + 0.1*r.mean().detach() if hasattr(reinforce_step,"baseline") else r.mean().detach()
        adv = r - reinforce_step.baseline
        loss = - (dist.log_prob(a) * adv.detach()).mean()
        loss += - CFG.entropy_beta * dist.entropy().mean()
        loss += CFG.aux_sup_weight * nn.MSELoss()(mu, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        opt.step()

    for i in range(len(arms)):
        xb,yb,_ = sample_minibatch(data_xy, arms, i, CFG.batch_size)
        reinforce_step(xb,yb)

    log = {"round":[], "arm":[], "asset":[], "horizon":[], "reward_lp":[], "val_before":[], "val_after":[], "mean_reward_est":[], "pulls_arm":[]}
    for t in range(1, CFG.teacher_rounds+1):
        i = teacher.select()
        a,h = arms[i]
        vb = eval_val_mse(model, data_xy, arms, i)
        for _ in range(CFG.train_steps_per_pull):
            xb,yb,_ = sample_minibatch(data_xy, arms, i, CFG.batch_size)
            reinforce_step(xb,yb)
        va = eval_val_mse(model, data_xy, arms, i)
        rw = float(vb - va)
        teacher.update(i, rw)

        log["round"].append(t); log["arm"].append(i); log["asset"].append(a); log["horizon"].append(h)
        log["reward_lp"].append(rw); log["val_before"].append(vb); log["val_after"].append(va)
        log["mean_reward_est"].append(float(teacher.mean[i])); log["pulls_arm"].append(int(teacher.n[i]))

    log_df = pd.DataFrame(log)
    log_csv = os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_teacherlog.csv")
    log_df.to_csv(log_csv, index=False)

    sel_df = (log_df.groupby(["arm","asset","horizon"]).size().reset_index(name="pulls").sort_values("arm"))
    sel_df["mean_reward"] = [float(teacher.mean[int(i)]) for i in sel_df["arm"].values]
    sel_csv = os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_selcounts.csv")
    sel_df.to_csv(sel_csv, index=False)

    def savefig(fname):
        p = os.path.join(FIG_DIR, fname); plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p

    labels = [f"{a}-H{h}" for (a,h) in arms]
    plt.figure(figsize=(8,3)); plt.plot(log_df["arm"].values, lw=0.8)
    plt.yticks(range(len(labels)), labels); plt.xlabel("Round"); plt.ylabel("Arm")
    plt.title(f"{run_name} ({feat_tag}) arm selection over time")
    f_arm_time = savefig(f"ablate_{run_name}_{feat_tag}_arm_over_time.png")

    plt.figure(figsize=(6,3))
    plt.bar(range(len(labels)), sel_df["pulls"].values)
    plt.xticks(range(len(labels)), labels, rotation=45, ha="right")
    plt.ylabel("Pulls"); plt.title(f"{run_name} ({feat_tag}) selection counts")
    f_sel_counts = savefig(f"ablate_{run_name}_{feat_tag}_selcounts.png")

    plt.figure(figsize=(7,3))
    plt.plot(pd.Series(log_df["reward_lp"]).rolling(10).mean()); plt.axhline(0.0, color="black", lw=1)
    plt.title(f"{run_name} ({feat_tag}) rolling mean Δ val MSE"); plt.xlabel("Round"); plt.ylabel("Reward")
    f_reward = savefig(f"ablate_{run_name}_{feat_tag}_reward_curve.png")

    inst_regret = []
    for r in range(len(log_df)):
        means_snapshot = log_df.iloc[:r+1].groupby("arm")["mean_reward_est"].last().to_dict()
        best_mean = max(means_snapshot.values()) if means_snapshot else 0.0
        inst_regret.append(best_mean - log_df.iloc[r]["reward_lp"])
    regret = np.cumsum(inst_regret)
    plt.figure(figsize=(7,3)); plt.plot(regret); plt.title(f"{run_name} ({feat_tag}) cumulative regret"); plt.xlabel("Round"); plt.ylabel("Regret")
    f_regret = savefig(f"ablate_{run_name}_{feat_tag}_regret.png")

    params = count_params(model)
    ms = torch_summary(model, input_size=(1, CFG.win, input_dim), verbose=0)
    with open(os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_model_summary.txt"), "w") as f:
        f.write(str(ms)); f.write(f"\nTotal trainable parameters: {params}\n")
    if TORCHVIZ_OK:
        try:
            xdummy = torch.randn(1, CFG.win, input_dim).to(device)
            mu,_ = model(xdummy)
            dot = make_dot(mu, params=dict(list(model.named_parameters())))
            dot.render(os.path.join(FIG_DIR, f"ablate_{run_name}_{feat_tag}_graph"), format="png", cleanup=True)
        except Exception as e:
            print("torchviz failed:", e)

    results = []
    for i,(a,h) in enumerate(arms):
        Xte,Yte,Prev = data_xy[(a,"test",h)]
        with torch.no_grad():
            xb = torch.from_numpy(Xte).float().to(device)
            mu,_ = model(xb)
            yhat = mu.squeeze(1).cpu().numpy()
        y_true = Yte.squeeze(1); y_prev = Prev.squeeze(1)

        results.append(dict(run=run_name, features=feat_tag, asset=a, horizon=h,
                            MAE=mae(y_true, yhat), RMSE=rmse(y_true, yhat),
                            sMAPE=smape(y_true, yhat), DA=dir_acc(y_prev, y_true, yhat)))

        Nplot = min(CFG.eval_n_trace, len(y_true))
        plt.figure(figsize=(8,3.5))
        plt.plot(y_true[:Nplot], label="true"); plt.plot(yhat[:Nplot], label="pred")
        plt.title(f"{run_name} ({feat_tag}) — Test trace {a}, H={h}")
        plt.xlabel("Index"); plt.ylabel("Standardized close"); plt.legend()
        _ = os.path.join(FIG_DIR, f"ablate_{run_name}_{feat_tag}_trace_{a}_H{h}.png"); plt.tight_layout(); plt.savefig(_, dpi=150); plt.close()

        plt.figure(figsize=(6,3))
        resid = y_true - yhat
        plt.hist(resid, bins=40)
        plt.title(f"{run_name} ({feat_tag}) — Residuals {a}, H={h}")
        plt.xlabel("Residual"); plt.ylabel("Count")
        _ = os.path.join(FIG_DIR, f"ablate_{run_name}_{feat_tag}_resid_{a}_H{h}.png"); plt.tight_layout(); plt.savefig(_, dpi=150); plt.close()

    res_df = pd.DataFrame(results).sort_values(["horizon","asset"])
    res_csv = os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_results_per_arm.csv")
    res_df.to_csv(res_csv, index=False)

    agg_df = (res_df.groupby("horizon")
              .agg(MAE=("MAE","mean"), RMSE=("RMSE","mean"), sMAPE=("sMAPE","mean"), DA=("DA","mean"))
              .reset_index().sort_values("horizon"))
    agg_csv = os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_results_agg_by_horizon.csv")
    agg_df.to_csv(agg_csv, index=False)

    def barplot_metric(df, metric, fname, title):
        plt.figure(figsize=(6.8,3.8))
        plt.bar([f"H={h}" for h in df["horizon"]], df[metric].values)
        plt.ylabel(metric); plt.title(title);
        p = os.path.join(FIG_DIR, fname); plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p

    p_mae = barplot_metric(agg_df, "MAE",   f"ablate_{run_name}_{feat_tag}_mae_bars.png",   f"{run_name} ({feat_tag}) MAE by horizon")
    p_rmse= barplot_metric(agg_df, "RMSE",  f"ablate_{run_name}_{feat_tag}_rmse_bars.png",  f"{run_name} ({feat_tag}) RMSE by horizon")
    p_sm  = barplot_metric(agg_df, "sMAPE", f"ablate_{run_name}_{feat_tag}_smape_bars.png", f"{run_name} ({feat_tag}) sMAPE by horizon")
    p_da  = barplot_metric(agg_df, "DA",    f"ablate_{run_name}_{feat_tag}_da_bars.png",    f"{run_name} ({feat_tag}) Directional Accuracy by horizon")

    outputs = {
        "log_csv": log_csv, "sel_csv": sel_csv, "res_csv": res_csv, "agg_csv": agg_csv,
        "figs": [f_arm_time, f_sel_counts, f_reward, f_regret, p_mae, p_rmse, p_sm, p_da]
    }
    return outputs, res_df, agg_df, sel_df

def supervised_run(run_name: str, data_xy, arms, input_dim: int, feat_tag="aug"):
    model = PolicyLSTM(input_dim=input_dim, hidden=CFG.lstm_hidden, layers=CFG.lstm_layers, dropout=CFG.lstm_dropout).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=CFG.lr)
    loss_fn = nn.MSELoss()

    Xtr = []; Ytr = []
    for a,h in arms:
        X,Y,_ = data_xy[(a,"train",h)]
        Xtr.append(X); Ytr.append(Y)
    Xtr = torch.from_numpy(np.concatenate(Xtr, axis=0)).float().to(device)
    Ytr = torch.from_numpy(np.concatenate(Ytr, axis=0)).float().to(device)

    Xva = []; Yva = []
    for a,h in arms:
        X,Y,_ = data_xy[(a,"val",h)]
        Xva.append(X); Yva.append(Y)
    Xva = torch.from_numpy(np.concatenate(Xva, axis=0)).float().to(device)
    Yva = torch.from_numpy(np.concatenate(Yva, axis=0)).float().to(device)

    best = (1e9, None)
    for ep in range(1, CFG.epochs_supervised+1):
        model.train()
        idx = torch.randperm(Xtr.shape[0])
        for start in range(0, len(idx), CFG.batch_size):
            sel = idx[start:start+CFG.batch_size]
            xb, yb = Xtr[sel], Ytr[sel]
            opt.zero_grad()
            mu,_ = model(xb)
            loss = loss_fn(mu, yb)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
        model.eval()
        with torch.no_grad():
            mu,_ = model(Xva)
            va = loss_fn(mu, Yva).item()
        if va < best[0]:
            best = (va, {k:v.cpu().clone() for k,v in model.state_dict().items()})
    if best[1] is not None:
        model.load_state_dict(best[1])

    params = count_params(model)
    ms = torch_summary(model, input_size=(1, CFG.win, input_dim), verbose=0)
    with open(os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_model_summary.txt"), "w") as f:
        f.write(str(ms)); f.write(f"\nTotal trainable parameters: {params}\n")
    if TORCHVIZ_OK:
        try:
            xdummy = torch.randn(1, CFG.win, input_dim).to(device)
            mu,_ = model(xdummy)
            dot = make_dot(mu, params=dict(list(model.named_parameters())))
            dot.render(os.path.join(FIG_DIR, f"ablate_{run_name}_{feat_tag}_graph"), format="png", cleanup=True)
        except Exception as e:
            print("torchviz failed:", e)

    results = []
    for a,h in arms:
        Xte,Yte,Prev = data_xy[(a,"test",h)]
        with torch.no_grad():
            xb = torch.from_numpy(Xte).float().to(device)
            mu,_ = model(xb)
            yhat = mu.squeeze(1).cpu().numpy()
        y_true = Yte.squeeze(1); y_prev = Prev.squeeze(1)
        results.append(dict(run=run_name, features=feat_tag, asset=a, horizon=h,
                            MAE=mae(y_true, yhat), RMSE=rmse(y_true, yhat),
                            sMAPE=smape(y_true, yhat), DA=dir_acc(y_prev, y_true, yhat)))
    res_df = pd.DataFrame(results).sort_values(["horizon","asset"])
    res_csv = os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_results_per_arm.csv")
    res_df.to_csv(res_csv, index=False)

    agg_df = (res_df.groupby("horizon")
              .agg(MAE=("MAE","mean"), RMSE=("RMSE","mean"), sMAPE=("sMAPE","mean"), DA=("DA","mean"))
              .reset_index().sort_values("horizon"))
    agg_csv = os.path.join(TBL_DIR, f"ablate_{run_name}_{feat_tag}_results_agg_by_horizon.csv")
    agg_df.to_csv(agg_csv, index=False)

    def barplot(df, metric, fname, title):
        plt.figure(figsize=(6.8,3.8))
        plt.bar([f"H={h}" for h in df["horizon"]], df[metric].values)
        plt.ylabel(metric); plt.title(title)
        p = os.path.join(FIG_DIR, fname); plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p
    p_mae = barplot(agg_df, "MAE",   f"ablate_{run_name}_{feat_tag}_mae_bars.png",   f"{run_name} ({feat_tag}) MAE by horizon")
    p_rmse= barplot(agg_df, "RMSE",  f"ablate_{run_name}_{feat_tag}_rmse_bars.png",  f"{run_name} ({feat_tag}) RMSE by horizon")
    p_sm  = barplot(agg_df, "sMAPE", f"ablate_{run_name}_{feat_tag}_smape_bars.png", f"{run_name} ({feat_tag}) sMAPE by horizon")
    p_da  = barplot(agg_df, "DA",    f"ablate_{run_name}_{feat_tag}_da_bars.png",    f"{run_name} ({feat_tag}) Directional Accuracy by horizon")

    outputs = {"res_csv":res_csv, "agg_csv":agg_csv,
               "figs":[p_mae,p_rmse,p_sm,p_da]}
    return outputs, res_df, agg_df

input_dim_price = 1
out_v1, res_v1, agg_v1, sel_v1 = bandit_run("ACG_UCB1", data_xy_price, arms_common, input_dim_price, teacher_type="ucb1", feat_tag="price")

input_dim_aug = len([c for c in df_aug.columns if c in ["z","dz","vol14","rsi14","macd_z","dist_ma20_z"]])
out_v2, res_v2, agg_v2, sel_v2 = bandit_run("ACG_UCB1", data_xy_aug, arms_common, input_dim_aug, teacher_type="ucb1", feat_tag="aug")

out_v3, res_v3, agg_v3, sel_v3 = bandit_run("UNIFORM", data_xy_aug, arms_common, input_dim_aug, teacher_type="uniform", feat_tag="aug")

out_v4, res_v4, agg_v4 = supervised_run("SUPERVISED", data_xy_aug, arms_common, input_dim_aug, feat_tag="aug")

agg_v1["run"]="ACG-UCB1(price)"; agg_v2["run"]="ACG-UCB1(aug)"
agg_v3["run"]="UNIFORM(aug)";    agg_v4["run"]="SUPERVISED(aug)"
agg_all = pd.concat([agg_v1,agg_v2,agg_v3,agg_v4], ignore_index=True)

agg_all_csv = os.path.join(TBL_DIR, "ablate_agg_horizon_cross_variant.csv")
agg_all.to_csv(agg_all_csv, index=False)

def grouped_bars(df, metric, fname, title):
    plt.figure(figsize=(8.8,4.2))
    horizons = sorted(df["horizon"].unique())
    runs = ["ACG-UCB1(price)","ACG-UCB1(aug)","UNIFORM(aug)","SUPERVISED(aug)"]
    width = 0.18; idx = np.arange(len(horizons))
    for i, r in enumerate(runs):
        sub = df[df["run"]==r].set_index("horizon").reindex(horizons)
        plt.bar(idx + i*width, sub[metric].values, width=width, label=r)
    plt.xticks(idx + width*1.5, [f"H={h}" for h in horizons])
    plt.ylabel(metric); plt.title(title); plt.legend()
    p = os.path.join(FIG_DIR, fname); plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p

g_mae  = grouped_bars(agg_all, "MAE",   "ablate_cross_variant_mae.png",   "MAE by horizon — cross-variant")
g_rmse = grouped_bars(agg_all, "RMSE",  "ablate_cross_variant_rmse.png",  "RMSE by horizon — cross-variant")
g_sm   = grouped_bars(agg_all, "sMAPE", "ablate_cross_variant_smape.png", "sMAPE by horizon — cross-variant")
g_da   = grouped_bars(agg_all, "DA",    "ablate_cross_variant_da.png",    "Directional Accuracy by horizon — cross-variant")

def head_table(df, n=10):
    try: return df.head(n).to_string(index=False)
    except: return str(df.head(n))

print("\n§4.5 RESULTS SUMMARY")
print("Common arms:", arms_common)
print("\nAggregates by horizon — ACG-UCB1 (price):", out_v1["agg_csv"]); print(head_table(agg_v1, 10))
print("\nAggregates by horizon — ACG-UCB1 (aug):", out_v2["agg_csv"]);   print(head_table(agg_v2, 10))
print("\nAggregates by horizon — UNIFORM (aug):", out_v3["agg_csv"]);    print(head_table(agg_v3, 10))
print("\nAggregates by horizon — SUPERVISED (aug):", out_v4["agg_csv"]); print(head_table(agg_v4, 10))
print("\nCross-variant aggregate CSV:", agg_all_csv);                     print(head_table(agg_all, 12))

print("\nSelection counts — ACG-UCB1 (price):", out_v1["sel_csv"]); print(sel_v1.to_string(index=False))
print("\nSelection counts — ACG-UCB1 (aug):",   out_v2["sel_csv"]); print(sel_v2.to_string(index=False))
print("\nSelection counts — UNIFORM (aug):",    out_v3["sel_csv"]); print(sel_v3.to_string(index=False))

figs = sorted([os.path.join(FIG_DIR, f) for f in os.listdir(FIG_DIR) if f.endswith(".png")])
print("\nFIGURES (first 40)")
for p in figs[:40]:
    print(p)
if len(figs) > 40:
    print(f"... and {len(figs)-40} more figures")

print("\nArtifacts:")
print(" - V1 logs:", out_v1["log_csv"])
print(" - V2 logs:", out_v2["log_csv"])
print(" - V3 logs:", out_v3["log_csv"])
print(" - Per-arm results:", out_v1["res_csv"], out_v2["res_csv"], out_v3["res_csv"], out_v4["res_csv"])
print(" - Cross-variant aggregates:", agg_all_csv)


§4.5 Config
 {
  "ds_csv": "/content/export/tables/dataset_long_1D.csv",
  "assets_max": 5,
  "horizons": [
    1,
    3,
    7
  ],
  "win": 64,
  "batch_size": 256,
  "epochs_supervised": 4,
  "teacher_rounds": 150,
  "train_steps_per_pull": 1,
  "lr": 0.002,
  "entropy_beta": 0.001,
  "aux_sup_weight": 0.1,
  "ucb_c": 1.2,
  "seed": 1337,
  "device": "cpu",
  "lstm_hidden": 64,
  "lstm_layers": 1,
  "lstm_dropout": 0.0,
  "eval_n_trace": 400
}
Common feasible arms: [('CLOSE', 1), ('CLOSE', 3), ('CLOSE', 7), ('HIGH', 1), ('HIGH', 3), ('HIGH', 7), ('LOW', 1), ('LOW', 3), ('LOW', 7), ('OPEN', 1), ('OPEN', 3), ('OPEN', 7)]

§4.5 RESULTS SUMMARY
Common arms: [('CLOSE', 1), ('CLOSE', 3), ('CLOSE', 7), ('HIGH', 1), ('HIGH', 3), ('HIGH', 7), ('LOW', 1), ('LOW', 3), ('LOW', 7), ('OPEN', 1), ('OPEN', 3), ('OPEN', 7)]

Aggregates by horizon — ACG-UCB1 (price): /content/export/tables/ablate_ACG_UCB1_price_results_agg_by_horizon.csv
 horizon      MAE     RMSE      sMAPE       DA             run


In [11]:
# === Chapter 4 §4.6: Comparative Analysis Experiment (CLEAN ONE-CELL) ===
# Purpose:
# - Compare ACG-DRL vs baselines (LSTM, SMA, Naive)
# - Run Wilcoxon (paired) and Friedman+Nemenyi tests per horizon
# - Generate boxplots, scatter plots, and CD diagrams
# - Denormalize MAE to price scale
# Inputs:
#   /content/export/tables/baseline_results_per_asset_horizon.csv
#   /content/export/tables/acg_results_per_arm.csv
#   /content/export/tables/dataset_long_1D.csv
# Outputs:
#   Tables and Figures saved under /content/export/{tables,figures}/

import os, json, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Stats library
try:
    import scipy.stats as stats
except ImportError:
    !pip -q install scipy > /dev/null
    import scipy.stats as stats

# --- Paths ---
BASE_DIR = "/content"
TBL_DIR  = f"{BASE_DIR}/export/tables"
FIG_DIR  = f"{BASE_DIR}/export/figures"
for d in [TBL_DIR, FIG_DIR]:
    os.makedirs(d, exist_ok=True)

CSV_BASE = f"{TBL_DIR}/baseline_results_per_asset_horizon.csv"
CSV_ACG  = f"{TBL_DIR}/acg_results_per_arm.csv"
CSV_DS   = f"{BASE_DIR}/export/tables/dataset_long_1D.csv"

assert os.path.exists(CSV_BASE), f"Missing {CSV_BASE}"
assert os.path.exists(CSV_ACG),  f"Missing {CSV_ACG}"
assert os.path.exists(CSV_DS),   f"Missing {CSV_DS}"

# --- Load and clean dataset ---
df_all = pd.read_csv(CSV_DS, parse_dates=["timestamp"])

# Normalize column names
df_all.columns = [c.strip().lower() for c in df_all.columns]
rename_map = {}
if "ticker" in df_all.columns and "symbol" not in df_all.columns:
    rename_map["ticker"] = "symbol"
if "asset" in df_all.columns and "symbol" not in df_all.columns:
    rename_map["asset"] = "symbol"
if "price" in df_all.columns and "close" not in df_all.columns:
    rename_map["price"] = "close"
if "close_price" in df_all.columns and "close" not in df_all.columns:
    rename_map["close_price"] = "close"
if rename_map:
    df_all = df_all.rename(columns=rename_map)

# Ensure numeric close
df_all["close"] = pd.to_numeric(df_all["close"], errors="coerce")
df_all["symbol"] = df_all["symbol"].astype(str)
bad = df_all["close"].isna().sum()
if bad:
    print(f"[clean] Coerced {bad} non-numeric 'close' values to NaN (ignored in std).")

# Compute std per asset for train split
std_train = (
    df_all[df_all["split"] == "train"]
    .groupby("symbol")["close"]
    .std(ddof=0)
    .rename("std_close_train")
)
std_train = std_train.replace({0: 1.0}).fillna(1.0)

# --- Load results ---
b = pd.read_csv(CSV_BASE)
a = pd.read_csv(CSV_ACG)
a["model"] = "ACG-DRL"

keep_models = {"ACG-DRL", "LSTM", "SMA", "Naive"}
b = b[b["model"].isin(keep_models - {"ACG-DRL"})].copy()
a = a[a["model"] == "ACG-DRL"].copy()

# --- Join per horizon ---
def join_per_arm(h):
    acg_h = a[a["horizon"] == h][["asset", "horizon", "MAE", "RMSE", "sMAPE", "DA"]].rename(
        columns={"MAE": "MAE_ACG", "RMSE": "RMSE_ACG", "sMAPE": "sMAPE_ACG", "DA": "DA_ACG"}
    )
    rows = []
    for m in ["LSTM", "SMA", "Naive"]:
        bm = b[(b["horizon"] == h) & (b["model"] == m)][["asset", "horizon", "MAE", "RMSE", "sMAPE", "DA"]].rename(
            columns={"MAE": f"MAE_{m}", "RMSE": f"RMSE_{m}", "sMAPE": f"sMAPE_{m}", "DA": f"DA_{m}"}
        )
        rows.append(bm)
    base_wide = rows[0]
    for r in rows[1:]:
        base_wide = base_wide.merge(r, on=["asset", "horizon"], how="inner")
    joined = base_wide.merge(acg_h, on=["asset", "horizon"], how="inner")
    joined = joined.merge(std_train, left_on="asset", right_index=True, how="left")
    joined["std_close_train"] = joined["std_close_train"].fillna(1.0)
    return joined

J = {h: join_per_arm(h) for h in [1, 3, 7]}
all_joined = pd.concat([J[1], J[3], J[7]], ignore_index=True)
out_joined_csv = f"{TBL_DIR}/compare_per_arm_joined.csv"
all_joined.to_csv(out_joined_csv, index=False)
print("Joined per-arm rows:", {h: len(J[h]) for h in J})
print("Saved:", out_joined_csv)

# --- Paired Wilcoxon ---
def paired_wilcoxon(x, y, alternative="two-sided"):
    diff = y - x
    diff = diff[np.isfinite(diff)]
    n = diff.size
    if n == 0:
        return np.nan, np.nan, 0, np.nan, np.nan
    try:
        stat, p = stats.wilcoxon(x, y, zero_method="pratt", alternative=alternative)
    except Exception:
        stat, p = np.nan, np.nan
    md = float(np.median(diff))
    d = float(np.mean(diff) / (np.std(diff, ddof=1) + 1e-12))
    return stat, p, n, md, d

rows = []
for h in [1, 3, 7]:
    dfh = J[h].copy()
    for m in ["LSTM", "SMA", "Naive"]:
        stat, p, n, md, d = paired_wilcoxon(dfh["MAE_ACG"], dfh[f"MAE_{m}"], alternative="less")
        rows.append(dict(horizon=h, metric="MAE", baseline=m, n=n, median_diff=(dfh[f"MAE_{m}"]-dfh["MAE_ACG"]).median(), p_value=p))
        stat, p, n, md, d = paired_wilcoxon(dfh["RMSE_ACG"], dfh[f"RMSE_{m}"], alternative="less")
        rows.append(dict(horizon=h, metric="RMSE", baseline=m, n=n, median_diff=(dfh[f"RMSE_{m}"]-dfh["RMSE_ACG"]).median(), p_value=p))
        stat, p, n, md, d = paired_wilcoxon(dfh["DA_ACG"], dfh[f"DA_{m}"], alternative="greater")
        rows.append(dict(horizon=h, metric="DA", baseline=m, n=n, median_diff=(dfh["DA_ACG"]-dfh[f"DA_{m}"]).median(), p_value=p))

wilcoxon_df = pd.DataFrame(rows)
wilcoxon_csv = f"{TBL_DIR}/wilcoxon_acg_vs_baselines_by_horizon.csv"
wilcoxon_df.to_csv(wilcoxon_csv, index=False)
print("Saved Wilcoxon summary:", wilcoxon_csv)
print(wilcoxon_df.head(12).to_string(index=False))

# --- Friedman + Nemenyi ---
def avg_ranks(values_2d, model_names):
    ranks = []
    for row in values_2d:
        r = stats.rankdata(row, method="average")
        ranks.append(r)
    ranks = np.array(ranks)
    return dict(zip(model_names, ranks.mean(axis=0))), ranks

def nemenyi_cd(k, N, alpha=0.05):
    q_table = {2:1.960, 3:2.343, 4:2.569, 5:2.728, 6:2.850, 7:2.948}
    q = q_table.get(k, 2.569)
    return q * math.sqrt(k*(k+1)/(6.0*N))

def plot_cd(avg_ranks_dict, cd, title, savepath):
    models = list(avg_ranks_dict.keys())
    ranks = np.array([avg_ranks_dict[m] for m in models])
    order = np.argsort(ranks)
    models = [models[i] for i in order]
    ranks = ranks[order]
    min_r, max_r = min(ranks)-0.5, max(ranks)+0.5

    plt.figure(figsize=(8, 1.8))
    y = 0.5
    plt.hlines(y, min_r, max_r, color="black")
    for r in np.arange(math.floor(min_r), math.ceil(max_r)+1):
        plt.vlines(r, y-0.05, y+0.05, color="black")
        plt.text(r, y+0.12, f"{r:.0f}", ha="center", va="bottom", fontsize=9)
    for m, r in zip(models, ranks):
        plt.vlines(r, y-0.05, y+0.05, color="black")
        plt.text(r, y-0.20, m, ha="center", va="top", fontsize=10)
    cd_left = max_r - cd
    plt.hlines(y+0.30, cd_left, max_r, color="black", linewidth=2)
    plt.vlines(cd_left, y+0.25, y+0.35, color="black")
    plt.vlines(max_r, y+0.25, y+0.35, color="black")
    plt.text((cd_left+max_r)/2, y+0.38, f"CD = {cd:.2f}", ha="center", va="bottom", fontsize=9)
    plt.title(title, fontsize=11)
    plt.yticks([]); plt.ylim(0, 1.0); plt.xlim(min_r, max_r)
    plt.tight_layout(); plt.savefig(savepath, dpi=150); plt.close()

friedman_rows = []
for h in [1, 3, 7]:
    dfh = J[h].copy()
    models = ["ACG-DRL","LSTM","SMA","Naive"]
    mat = np.vstack([
        dfh["MAE_ACG"], dfh["MAE_LSTM"], dfh["MAE_SMA"], dfh["MAE_Naive"]
    ]).T
    mat = mat[np.isfinite(mat).all(axis=1)]
    N = mat.shape[0]
    if N < 2:
        continue
    F_stat, F_p = stats.friedmanchisquare(*[mat[:,i] for i in range(mat.shape[1])])
    avg_r, _ = avg_ranks(mat, models)
    cd = nemenyi_cd(k=len(models), N=N)
    friedman_rows.append(dict(horizon=h, N=N, friedman_stat=F_stat, friedman_p=F_p, cd=cd, **{f"rank_{m}":avg_r[m] for m in models}))
    plot_cd(avg_r, cd, f"Critical Difference (MAE) — H={h} (N={N})", f"{FIG_DIR}/comp_cd_diagram_H{h}.png")

friedman_df = pd.DataFrame(friedman_rows)
friedman_csv = f"{TBL_DIR}/friedman_nemenyi_by_horizon.csv"
friedman_df.to_csv(friedman_csv, index=False)
print("Saved Friedman+Nemenyi summary:", friedman_csv)
print(friedman_df.to_string(index=False))

# --- Boxplots and Scatter ---
def savefig(p):
    plt.tight_layout(); plt.savefig(p, dpi=150); plt.close(); return p

for h in [1, 3, 7]:
    dfh = J[h]
    if len(dfh) == 0: continue
    data = [dfh["MAE_ACG"], dfh["MAE_LSTM"], dfh["MAE_SMA"], dfh["MAE_Naive"]]
    labels = ["ACG-DRL","LSTM","SMA","Naive"]
    plt.figure(figsize=(6.5,3.6))
    plt.boxplot(data, labels=labels, showmeans=True)
    plt.ylabel("MAE (standardized)"); plt.title(f"MAE by model — H={h}")
    savefig(f"{FIG_DIR}/comp_boxplot_mae_H{h}.png")

    data = [dfh["RMSE_ACG"], dfh["RMSE_LSTM"], dfh["RMSE_SMA"], dfh["RMSE_Naive"]]
    plt.figure(figsize=(6.5,3.6))
    plt.boxplot(data, labels=labels, showmeans=True)
    plt.ylabel("RMSE (standardized)"); plt.title(f"RMSE by model — H={h}")
    savefig(f"{FIG_DIR}/comp_boxplot_rmse_H{h}.png")

    x = dfh["MAE_LSTM"] - dfh["MAE_ACG"]
    y = dfh["DA_ACG"] - dfh["DA_LSTM"]
    plt.figure(figsize=(5.5,3.6))
    plt.axvline(0, color="k", lw=1); plt.axhline(0, color="k", lw=1)
    plt.scatter(x, y, s=18)
    plt.xlabel("ΔMAE (LSTM − ACG)  [>0 → ACG better]")
    plt.ylabel("ΔDA  (ACG − LSTM)  [>0 → ACG better]")
    plt.title(f"Trade-off: Direction vs. Magnitude — H={h}")
    savefig(f"{FIG_DIR}/comp_scatter_da_vs_mae_diff_H{h}.png")

# --- Denormalized MAE ---
denorm_rows = []
for h in [1, 3, 7]:
    dfh = J[h]
    for m, col in [("ACG-DRL","MAE_ACG"),("LSTM","MAE_LSTM"),("SMA","MAE_SMA"),("Naive","MAE_Naive")]:
        tmp = dfh[["asset", col, "std_close_train"]].copy()
        tmp["MAE_price"] = tmp[col] * tmp["std_close_train"]
        denorm_rows.append(tmp.assign(horizon=h, model=m)[["horizon","model","asset","MAE_price"]])

denorm_df = pd.concat(denorm_rows, ignore_index=True)
denorm_csv = f"{TBL_DIR}/compare_denorm_mae_price_per_arm.csv"
denorm_df.to_csv(denorm_csv, index=False)
print("Denormalized MAE saved:", denorm_csv)

print("\n=== §4.6 COMPLETED SUCCESSFULLY ===")


[clean] Coerced 5507 non-numeric 'close' values to NaN (ignored in std).
Joined per-arm rows: {1: 4, 3: 4, 7: 4}
Saved: /content/export/tables/compare_per_arm_joined.csv
Saved Wilcoxon summary: /content/export/tables/wilcoxon_acg_vs_baselines_by_horizon.csv
 horizon metric baseline  n  median_diff  p_value
       1    MAE     LSTM  4     0.024718   0.0625
       1   RMSE     LSTM  4    -0.001189   1.0000
       1     DA     LSTM  4     0.001898   0.2500
       1    MAE      SMA  4     0.453427   0.0625
       1   RMSE      SMA  4     0.446517   0.0625
       1     DA      SMA  4    -0.084440   1.0000
       1    MAE    Naive  4     0.464792   0.0625
       1   RMSE    Naive  4     1.996613   0.0625
       1     DA    Naive  4     0.567362   0.0625
       3    MAE     LSTM  4     0.013663   0.0625
       3   RMSE     LSTM  4    -0.001404   0.8750
       3     DA     LSTM  4     0.000952   0.2500
Saved Friedman+Nemenyi summary: /content/export/tables/friedman_nemenyi_by_horizon.csv
 hori

  plt.boxplot(data, labels=labels, showmeans=True)
  plt.boxplot(data, labels=labels, showmeans=True)
  plt.boxplot(data, labels=labels, showmeans=True)
  plt.boxplot(data, labels=labels, showmeans=True)
  plt.boxplot(data, labels=labels, showmeans=True)
  plt.boxplot(data, labels=labels, showmeans=True)


Denormalized MAE saved: /content/export/tables/compare_denorm_mae_price_per_arm.csv

=== §4.6 COMPLETED SUCCESSFULLY ===


In [13]:
# === Chapter 4 §4.7: Sensitivity & Hyperparameter Studies  ===
# Covers:
#  4.7.1 Seeds & early stopping (protocol)
#  4.7.2 Teacher sensitivity: UCB1 c, rounds, train-steps-per-pull
#  4.7.3 Student sensitivity: hidden, layers, dropout, learning rate
#  4.7.4 Window length w and horizon H dependence
#  4.7.5 Feature set sensitivity: price-only vs. augmented
#  4.7.6 Data coverage sensitivity: assets_max, sparse/downsampled train windows

import os, json, math, random, warnings
from dataclasses import dataclass, asdict
from typing import Dict, Tuple, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

warnings.filterwarnings("ignore", category=FutureWarning)

# -----------------------------
# Paths & folders
# -----------------------------
BASE_DIR = "/content"
DS_CSV   = f"{BASE_DIR}/export/tables/dataset_long_1D.csv"
TBL_DIR  = f"{BASE_DIR}/export/tables"
FIG_DIR  = f"{BASE_DIR}/export/figures"
for d in [TBL_DIR, FIG_DIR]: os.makedirs(d, exist_ok=True)

assert os.path.exists(DS_CSV), f"Dataset not found: {DS_CSV}"

# -----------------------------
# Config (baseline + sweep grids)
# -----------------------------
@dataclass
class SensCfg:
    ds_csv: str = DS_CSV
    assets_max_base: int = 5
    horizons_base: Tuple[int,...] = (1,3,7)
    win_base: int = 64
    batch_size: int = 256
    teacher_rounds_base: int = 80
    train_steps_per_pull_base: int = 1
    ucb_c_base: float = 1.2
    seed_base: int = 1337
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    lstm_hidden_base: int = 64
    lstm_layers_base: int = 1
    lstm_dropout_base: float = 0.0
    lr_base: float = 2e-3
    eval_n_trace: int = 200
    patience_rounds: int = 15     # early stopping on global val MSE
    eval_every: int = 10
    # sweep values
    seeds: Tuple[int,...] = (42, 1337, 2024)
    ucb_c_vals: Tuple[float,...] = (0.6, 1.2, 2.0)
    rounds_vals: Tuple[int,...] = (60, 120)
    steps_vals: Tuple[int,...] = (1, 2)
    hidden_vals: Tuple[int,...] = (32, 64)
    layers_vals: Tuple[int,...] = (1, 2)
    dropout_vals: Tuple[float,...] = (0.0, 0.2)
    lr_vals: Tuple[float,...] = (1e-3, 2e-3)
    win_vals: Tuple[int,...] = (32, 64, 128)
    feature_sets: Tuple[str,...] = ("price","aug")
    assets_max_vals: Tuple[int,...] = (3, 5)
    downsample_fracs: Tuple[float,...] = (1.0, 0.5, 0.25)

CFG = SensCfg()
print("§4.7 Config\n", json.dumps(asdict(CFG), indent=2))

# -----------------------------
# Utilities
# -----------------------------
def set_seed(s):
    random.seed(s); np.random.seed(s); torch.manual_seed(s);
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(s)

device = torch.device(CFG.device)

def savefig(path):
    plt.tight_layout(); plt.savefig(path, dpi=150); plt.close(); return path

# -----------------------------
# Load dataset and build features (robust coercion; train-only std)
# -----------------------------
df_all = pd.read_csv(CFG.ds_csv, parse_dates=["timestamp"])

# Normalize column names and ensure required fields
df_all.columns = [c.strip().lower() for c in df_all.columns]
if "ticker" in df_all.columns and "symbol" not in df_all.columns:
    df_all = df_all.rename(columns={"ticker":"symbol"})
if "asset" in df_all.columns and "symbol" not in df_all.columns:
    df_all = df_all.rename(columns={"asset":"symbol"})
if "price" in df_all.columns and "close" not in df_all.columns:
    df_all = df_all.rename(columns={"price":"close"})
if "close_price" in df_all.columns and "close" not in df_all.columns:
    df_all = df_all.rename(columns={"close_price":"close"})

assert {"timestamp","symbol","close","split"}.issubset(df_all.columns), "Dataset missing required columns."

# Coerce close to numeric (this prevents TypeError during std/mean)
df_all["close"] = pd.to_numeric(df_all["close"], errors="coerce")
close_nans = int(df_all["close"].isna().sum())
if close_nans:
    print(f"[clean] 'close': coerced {close_nans} non-numeric values to NaN; handled downstream.")

# Select top assets by coverage (max cap applied per sweep)
def top_assets(df, k):
    return (df.groupby("symbol").size().sort_values(ascending=False).head(k).index.tolist())

# Train-split close z-score per asset
def fit_close_scalers(df, assets):
    scalers = {}
    for a, g in df[(df["split"]=="train") & (df["symbol"].isin(assets))].groupby("symbol"):
        mu = pd.to_numeric(g["close"], errors="coerce").mean()
        sd = pd.to_numeric(g["close"], errors="coerce").std(ddof=0)
        if not np.isfinite(sd) or sd <= 0: sd = 1.0
        mu = 0.0 if not np.isfinite(mu) else float(mu)
        scalers[a] = (float(mu), float(sd))
    return scalers

def build_features(df, assets, augmented: bool):
    df = df[df["symbol"].isin(assets)].sort_values(["symbol","timestamp"]).copy()
    # ensure close numeric here too
    df["close"] = pd.to_numeric(df["close"], errors="coerce")
    scalers = fit_close_scalers(df, assets)
    df["z"] = (df["close"] - df["symbol"].map({k:v[0] for k,v in scalers.items()})) / df["symbol"].map({k:v[1] for k,v in scalers.items()})
    df["z"] = pd.to_numeric(df["z"], errors="coerce")

    if not augmented:
        out = df[["timestamp","symbol","split","z"]].copy()
        out["z"] = out["z"].replace([np.inf,-np.inf], np.nan).fillna(0.0).clip(-10,10)
        return out

    # augmented (past-only)
    def rsi(series: pd.Series, period: int = 14) -> pd.Series:
        series = pd.to_numeric(series, errors="coerce")
        delta = series.diff()
        up = delta.clip(lower=0.0)
        down = -delta.clip(upper=0.0)
        roll_up = up.rolling(period, min_periods=period).mean()
        roll_down = down.rolling(period, min_periods=period).mean()
        rs = roll_up / (roll_down + 1e-12)
        return 100.0 - (100.0 / (1.0 + rs))

    feats = []
    for a, g in df.groupby("symbol"):
        g = g.sort_values("timestamp").copy()
        z = pd.to_numeric(g["z"], errors="coerce")
        dz = z.diff()
        vol14 = dz.rolling(14, min_periods=14).std()
        rsi14 = rsi(g["close"], 14)
        ma5  = z.rolling(5,  min_periods=5).mean()
        ma20 = z.rolling(20, min_periods=20).mean()
        macd_z = ma5 - ma20
        dist_ma20_z = z - ma20

        tmp = pd.DataFrame({
            "timestamp": g["timestamp"].values,
            "symbol": a,
            "split": g["split"].values,
            "z": z.values,
            "dz": dz.values,
            "vol14": vol14.values,
            "rsi14": rsi14.values,
            "macd_z": macd_z.values,
            "dist_ma20_z": dist_ma20_z.values
        })

        # Coerce engineered columns to numeric BEFORE standardization
        std_cols = ["dz","vol14","rsi14","macd_z","dist_ma20_z"]
        for c in ["z"] + std_cols:
            tmp[c] = pd.to_numeric(tmp[c], errors="coerce")

        # standardize engineered feats using TRAIN stats only (per asset)
        train_mask = (tmp["split"]=="train")
        for c in std_cols:
            mu = pd.to_numeric(tmp.loc[train_mask, c], errors="coerce").mean()
            sd = pd.to_numeric(tmp.loc[train_mask, c], errors="coerce").std(ddof=0)
            if not np.isfinite(sd) or sd<=0: sd = 1.0
            mu = 0.0 if not np.isfinite(mu) else float(mu)
            tmp[c] = (tmp[c] - mu) / sd

        # sanitize engineered + z
        tmp[["z"]+std_cols] = (
            tmp[["z"]+std_cols]
            .replace([np.inf,-np.inf], np.nan)
            .fillna(0.0)
            .clip(-10,10)
        )
        feats.append(tmp)

    feats_std = pd.concat(feats, ignore_index=True)
    # final dtype check (defensive)
    for c in ["z","dz","vol14","rsi14","macd_z","dist_ma20_z"]:
        feats_std[c] = pd.to_numeric(feats_std[c], errors="coerce").fillna(0.0)
    return feats_std

# Sliding-window dataset builder (skip any non-finite window)
def make_xy_features(g: pd.DataFrame, feature_cols: List[str], horizon: int, win: int):
    vals = g[feature_cols].values.astype(np.float32)
    z = g["z"].values.astype(np.float32)
    X,Y,Prev = [], [], []
    dropped = 0
    for t in range(win-1, len(vals)-horizon):
        x = vals[t-win+1:t+1, :]
        y = z[t+horizon]
        p = z[t]
        if not (np.isfinite(x).all() and np.isfinite(y) and np.isfinite(p)):
            dropped += 1; continue
        X.append(x); Y.append([y]); Prev.append([p])
    if dropped>0: print(f"[make_xy_features] Dropped {dropped} windows (win={win}, h={horizon}).")
    if not X: return None, None, None
    return np.stack(X), np.stack(Y), np.stack(Prev)

def build_arm_data(df_feat: pd.DataFrame, horizons, win) -> Tuple[Dict, List[Tuple[str,int]], pd.DataFrame]:
    all_cols = ["z","dz","vol14","rsi14","macd_z","dist_ma20_z"]
    feature_cols = [c for c in df_feat.columns if c in all_cols]
    data_xy, arms, coverage = {}, [], []
    for a, g in df_feat.groupby("symbol"):
        g = g.sort_values("timestamp")
        for split in ["train","val","test"]:
            gs = g[g["split"]==split].reset_index(drop=True)
            for h in horizons:
                X,Y,P = make_xy_features(gs, feature_cols, h, win)
                if X is not None:
                    data_xy[(a,split,h)] = (X,Y,P)
                    coverage.append((a,split,h,len(Y)))
        for h in horizons:
            ok = all(((a,sp,h) in data_xy and data_xy[(a,sp,h)][1].shape[0]>0) for sp in ["train","val","test"])
            if ok: arms.append((a,h))
    cov_df = pd.DataFrame(coverage, columns=["asset","split","horizon","samples"])
    return data_xy, arms, cov_df

def downsample_train(data_xy: Dict, arms: List[Tuple[str,int]], frac: float):
    if frac>=0.999: return data_xy
    out = {}
    rng = np.random.default_rng(123)
    for k in list(data_xy.keys()):
        a, split, h = k
        X,Y,P = data_xy[k]
        if split=="train":
            n = len(Y); m = max(1, int(round(n*frac)))
            idx = rng.choice(n, size=m, replace=False)
            out[k] = (X[idx], Y[idx], P[idx])
        else:
            out[k] = (X,Y,P)
    return out

# -----------------------------
# Models & training
# -----------------------------
class PolicyLSTM(nn.Module):
    def __init__(self, input_dim=1, hidden=64, layers=1, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden, num_layers=layers,
                            dropout=(dropout if layers>1 else 0.0), batch_first=True)
        self.mu = nn.Sequential(nn.Linear(hidden, hidden), nn.ReLU(), nn.Linear(hidden, 1))
        self.log_sigma = nn.Parameter(torch.tensor(-0.5))
    def forward(self, x):
        o,_ = self.lstm(x)
        last = o[:, -1, :]
        mu = self.mu(last)
        log_sigma = self.log_sigma.expand_as(mu)
        mu = torch.nan_to_num(mu, nan=0.0, posinf=0.0, neginf=0.0)
        return mu, log_sigma

def count_params(m): return sum(p.numel() for p in m.parameters() if p.requires_grad)

class UCB1Teacher:
    def __init__(self, n_arms, c=1.2):
        self.c=c; self.n=np.zeros(n_arms, dtype=np.int64); self.mean=np.zeros(n_arms, dtype=np.float64); self.t=0
    def select(self):
        self.t+=1
        for i in range(len(self.n)):
            if self.n[i]==0: return i
        ucb = self.mean + self.c*np.sqrt(2.0*math.log(self.t)/self.n)
        return int(np.argmax(ucb))
    def update(self, i, reward):
        self.n[i]+=1
        self.mean[i]+= (reward - self.mean[i])/self.n[i]

class UniformTeacher:
    def __init__(self, n_arms):
        self.n = np.zeros(n_arms, dtype=np.int64); self.mean = np.zeros(n_arms, dtype=np.float64); self.t=0
    def select(self):
        self.t+=1
        return int(np.random.randint(0,len(self.n)))
    def update(self, i, reward):
        self.n[i]+=1
        self.mean[i]+= (reward - self.mean[i])/self.n[i]

# metrics
def mae(y, yhat): return float(np.mean(np.abs(y - yhat)))
def rmse(y, yhat): return float(np.sqrt(np.mean((y - yhat)**2)))
def smape(y, yhat, eps=1e-8):
    denom = (np.abs(y) + np.abs(yhat) + eps)/2.0
    return float(np.mean(np.abs(y - yhat)/denom) * 100.0)
def dir_acc(y_prev, y_true, yhat):
    s_true = np.sign(y_true - y_prev); s_pred = np.sign(yhat - y_prev)
    return float(np.mean((s_true == s_pred).astype(np.float32)))

def sample_minibatch(data_xy, arms, arm_idx, bs):
    a,h = arms[arm_idx]
    X,Y,P = data_xy[(a,"train",h)]
    if len(Y)==0:
        idx = np.array([0])
    else:
        idx = np.random.randint(0,len(Y), size=(min(bs, len(Y)),))
    xb = torch.from_numpy(X[idx]).float().to(device)
    yb = torch.from_numpy(Y[idx]).float().to(device)
    pb = torch.from_numpy(P[idx]).float().to(device)
    return xb, yb, pb

def eval_val_mse(model, data_xy, arms, arm_idx):
    a,h = arms[arm_idx]
    X,Y,_ = data_xy[(a,"val",h)]
    with torch.no_grad():
        xb = torch.from_numpy(X).float().to(device)
        mu, _ = model(xb)
        yhat = mu.squeeze(1).cpu().numpy()
    return float(np.mean((Y.squeeze(1) - yhat)**2))

def eval_overall_val_mse(model, data_xy, arms):
    vals = []
    for i,_ in enumerate(arms):
        vals.append(eval_val_mse(model, data_xy, arms, i))
    return float(np.mean(vals)) if vals else np.nan

def evaluate_test(model, data_xy, arms):
    rows = []
    for (a,h) in arms:
        Xte,Yte,Prev = data_xy[(a,"test",h)]
        with torch.no_grad():
            xb = torch.from_numpy(Xte).float().to(device)
            mu,_ = model(xb)
            yhat = mu.squeeze(1).cpu().numpy()
        y = Yte.squeeze(1); p = Prev.squeeze(1)
        rows.append(dict(asset=a, horizon=h, MAE=mae(y,yhat), RMSE=rmse(y,yhat),
                         sMAPE=smape(y,yhat), DA=dir_acc(p,y,yhat)))
    per_arm = pd.DataFrame(rows).sort_values(["horizon","asset"])
    agg = (per_arm.groupby("horizon").agg(MAE=("MAE","mean"), RMSE=("RMSE","mean"),
                                          sMAPE=("sMAPE","mean"), DA=("DA","mean"))
           .reset_index().sort_values("horizon"))
    return per_arm, agg

def bandit_run_once(seed, df_feat, arms, data_xy, *,
                    hidden, layers, dropout, lr,
                    ucb_c, rounds, steps_per_pull,
                    batch_size=CFG.batch_size, patience=CFG.patience_rounds, eval_every=CFG.eval_every):
    set_seed(seed)
    input_dim = len([c for c in df_feat.columns if c in ["z","dz","vol14","rsi14","macd_z","dist_ma20_z"]])
    model = PolicyLSTM(input_dim=input_dim, hidden=hidden, layers=layers, dropout=dropout).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=lr)
    teacher = UCB1Teacher(len(arms), c=ucb_c)

    def reinforce_step(xb, yb):
        model.train(); opt.zero_grad()
        mu, log_sigma = model(xb)
        sigma = torch.exp(log_sigma)
        dist = torch.distributions.Normal(mu, sigma)
        a = dist.rsample()
        r = - (a - yb)**2
        reinforce_step.baseline = 0.9*reinforce_step.baseline + 0.1*r.mean().detach() if hasattr(reinforce_step,"baseline") else r.mean().detach()
        adv = r - reinforce_step.baseline
        loss = - (dist.log_prob(a) * adv.detach()).mean()
        loss += - 1e-3 * dist.entropy().mean()
        loss += 0.1 * nn.MSELoss()(mu, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        opt.step()

    # warm
    for i in range(min(len(arms), 3)):
        xb,yb,_ = sample_minibatch(data_xy, arms, i, batch_size)
        reinforce_step(xb,yb)

    best = (np.inf, None, 0)  # (global_val, state_dict, rounds_used)
    no_improve = 0
    for t in range(1, rounds+1):
        i = teacher.select()
        vb = eval_val_mse(model, data_xy, arms, i)
        for _ in range(steps_per_pull):
            xb,yb,_ = sample_minibatch(data_xy, arms, i, batch_size)
            reinforce_step(xb,yb)
        va = eval_val_mse(model, data_xy, arms, i)
        teacher.update(i, float(vb - va))

        if t % eval_every == 0:
            gval = eval_overall_val_mse(model, data_xy, arms)
            if gval < best[0]:
                best = (gval, {k:v.cpu().clone() for k,v in model.state_dict().items()}, t)
                no_improve = 0
            else:
                no_improve += 1
            if no_improve >= patience:
                break

    rounds_used = best[2] if best[1] is not None else rounds
    if best[1] is not None: model.load_state_dict(best[1])

    per_arm, agg = evaluate_test(model, data_xy, arms)
    per_arm["rounds_used"] = rounds_used
    agg["rounds_used"] = rounds_used
    agg["params"] = count_params(model)
    return per_arm, agg

# -----------------------------
# Helper to prepare data per feature set / assets_max / win
# -----------------------------
def prepare_data(feature_set="price", assets_max=CFG.assets_max_base, win=CFG.win_base, horizons=CFG.horizons_base):
    assets = top_assets(df_all, assets_max)
    df_feat = build_features(df_all, assets, augmented=(feature_set=="aug"))
    data_xy, arms, cov = build_arm_data(df_feat, horizons, win)
    return df_feat, data_xy, arms, cov

# Baseline data for most sweeps (price-only) + augmented for §4.7.5
df_price, data_xy_price, arms_price, cov_price = prepare_data("price", CFG.assets_max_base, CFG.win_base, CFG.horizons_base)
assert len(arms_price)>0, "No feasible arms for baseline (price)."
df_aug, data_xy_aug, arms_aug, cov_aug = prepare_data("aug", CFG.assets_max_base, CFG.win_base, CFG.horizons_base)
arms_common = sorted(list(set(arms_price).intersection(set(arms_aug))))
if len(arms_common)==0:
    arms_common = arms_price  # fallback

print("Baseline feasible arms (price):", arms_price)
print("Feasible arms (aug)          :", arms_aug)
print("Arms used for shared comparisons:", arms_common)

# -----------------------------
# 4.7.1 Seeds & early stopping
# -----------------------------
def sweep_seeds():
    rows=[]
    for s in CFG.seeds:
        per_arm, agg = bandit_run_once(s, df_price, arms_price, data_xy_price,
                                       hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                       dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                       ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                       steps_per_pull=CFG.train_steps_per_pull_base)
        agg["seed"]=s; rows.append(agg)
    out = pd.concat(rows, ignore_index=True)
    out.to_csv(f"{TBL_DIR}/sens_471_seeds.csv", index=False)
    # plot variance across seeds (MAE by horizon)
    plt.figure(figsize=(7.2,4))
    for h in CFG.horizons_base:
        tmp = out[out["horizon"]==h]
        plt.plot(tmp["seed"].astype(str), tmp["MAE"].values, marker="o", label=f"H={h}")
    plt.xlabel("Seed"); plt.ylabel("MAE"); plt.title("Seed sensitivity (MAE)"); plt.legend()
    savefig(f"{FIG_DIR}/sens_471_seeds_mae.png")
    return out

# -----------------------------
# 4.7.2 Teacher sensitivity (c, rounds, steps)
# -----------------------------
def sweep_teacher():
    rows=[]
    # c
    for c in CFG.ucb_c_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=c, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["sweep"]="ucb_c"; agg["value"]=c; rows.append(agg)
    # rounds
    for r in CFG.rounds_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=r,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["sweep"]="rounds"; agg["value"]=r; rows.append(agg)
    # steps
    for sp in CFG.steps_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=sp)
        agg["sweep"]="steps"; agg["value"]=sp; rows.append(agg)
    out = pd.concat(rows, ignore_index=True)
    out.to_csv(f"{TBL_DIR}/sens_472_teacher.csv", index=False)
    # figures
    def lineplot(sub, title, fname):
        plt.figure(figsize=(6.8,3.8))
        for h in CFG.horizons_base:
            tmp = sub[sub["horizon"]==h].sort_values("value")
            plt.plot(tmp["value"].astype(float), tmp["MAE"].values, marker="o", label=f"H={h}")
        plt.xlabel(sub["sweep"].iloc[0]); plt.ylabel("MAE"); plt.title(title); plt.legend()
        savefig(f"{FIG_DIR}/{fname}")
    lineplot(out[out["sweep"]=="ucb_c"], "Teacher UCB c vs. MAE", "sens_472_ucb_c_vs_mae.png")
    lineplot(out[out["sweep"]=="rounds"], "Teacher rounds vs. MAE", "sens_472_rounds_vs_mae.png")
    lineplot(out[out["sweep"]=="steps"],  "Train-steps-per-pull vs. MAE", "sens_472_steps_vs_mae.png")
    return out

# -----------------------------
# 4.7.3 Student sensitivity (one-factor sweeps)
# -----------------------------
def sweep_student():
    outs=[]
    # hidden
    rows=[]
    for hdim in CFG.hidden_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=hdim, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["sweep"]="hidden"; agg["value"]=hdim; rows.append(agg)
    out_h = pd.concat(rows, ignore_index=True); out_h.to_csv(f"{TBL_DIR}/sens_473_student_hidden.csv", index=False); outs.append(out_h)

    # layers
    rows=[]
    for L in CFG.layers_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=CFG.lstm_hidden_base, layers=L,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["sweep"]="layers"; agg["value"]=L; rows.append(agg)
    out_L = pd.concat(rows, ignore_index=True); out_L.to_csv(f"{TBL_DIR}/sens_473_student_layers.csv", index=False); outs.append(out_L)

    # dropout
    rows=[]
    for dp in CFG.dropout_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=dp, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["sweep"]="dropout"; agg["value"]=dp; rows.append(agg)
    out_D = pd.concat(rows, ignore_index=True); out_D.to_csv(f"{TBL_DIR}/sens_473_student_dropout.csv", index=False); outs.append(out_D)

    # lr
    rows=[]
    for lr in CFG.lr_vals:
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, data_xy_price,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=lr,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["sweep"]="lr"; agg["value"]=lr; rows.append(agg)
    out_R = pd.concat(rows, ignore_index=True); out_R.to_csv(f"{TBL_DIR}/sens_473_student_lr.csv", index=False); outs.append(out_R)

    # plots
    def plot_one(sub, label, fname):
        plt.figure(figsize=(6.8,3.8))
        for h in CFG.horizons_base:
            tmp = sub[sub["horizon"]==h].sort_values("value")
            plt.plot(tmp["value"].astype(float), tmp["MAE"].values, marker="o", label=f"H={h}")
        plt.xlabel(label); plt.ylabel("MAE"); plt.title(f"Student {label} vs. MAE"); plt.legend()
        savefig(f"{FIG_DIR}/{fname}")
    plot_one(out_h, "hidden", "sens_473_hidden_vs_mae.png")
    plot_one(out_L, "layers", "sens_473_layers_vs_mae.png")
    plot_one(out_D, "dropout", "sens_473_dropout_vs_mae.png")
    plot_one(out_R, "lr",      "sens_473_lr_vs_mae.png")

    return outs

# -----------------------------
# 4.7.4 Window length w & horizon H
# -----------------------------
def sweep_window():
    rows=[]
    for w in CFG.win_vals:
        dfp, dxp, armsp, _ = prepare_data("price", CFG.assets_max_base, w, CFG.horizons_base)
        _, agg = bandit_run_once(CFG.seed_base, dfp, armsp, dxp,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["w"]=w; rows.append(agg)
    out = pd.concat(rows, ignore_index=True); out.to_csv(f"{TBL_DIR}/sens_474_win.csv", index=False)
    # plot MAE vs w for each horizon
    plt.figure(figsize=(7.2,4))
    for h in CFG.horizons_base:
        tmp = out[out["horizon"]==h].sort_values("w")
        plt.plot(tmp["w"], tmp["MAE"], marker="o", label=f"H={h}")
    plt.xlabel("Window length (w)"); plt.ylabel("MAE"); plt.title("Window length vs. MAE by horizon"); plt.legend()
    savefig(f"{FIG_DIR}/sens_474_w_by_horizon_mae.png")
    return out

# -----------------------------
# 4.7.5 Feature set sensitivity
# -----------------------------
def sweep_features():
    rows=[]
    # price
    _, agg_p = bandit_run_once(CFG.seed_base, df_price, arms_common, data_xy_price,
                               hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                               dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                               ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                               steps_per_pull=CFG.train_steps_per_pull_base)
    agg_p["feature_set"]="price"; rows.append(agg_p)
    # aug
    _, agg_a = bandit_run_once(CFG.seed_base, df_aug, arms_common, data_xy_aug,
                               hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                               dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                               ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                               steps_per_pull=CFG.train_steps_per_pull_base)
    agg_a["feature_set"]="aug"; rows.append(agg_a)
    out = pd.concat(rows, ignore_index=True); out.to_csv(f"{TBL_DIR}/sens_475_features.csv", index=False)
    # bar plot
    plt.figure(figsize=(6.8,3.8))
    for h in CFG.horizons_base:
        tmp = out[out["horizon"]==h]
        x = np.arange(2)
        vals = [tmp[tmp["feature_set"]=="price"]["MAE"].values[0],
                tmp[tmp["feature_set"]=="aug"]["MAE"].values[0]]
        plt.bar(x + (h-1)*0.25, vals, width=0.22, label=f"H={h}")
    plt.xticks(x + 0.25, ["price","aug"])
    plt.ylabel("MAE"); plt.title("Feature set sensitivity (MAE)"); plt.legend()
    savefig(f"{FIG_DIR}/sens_475_feature_set_bars.png")
    return out

# -----------------------------
# 4.7.6 Data coverage sensitivity
# -----------------------------
def sweep_coverage():
    # (i) assets_max
    rows=[]
    for k in CFG.assets_max_vals:
        dfp, dxp, armsp, _ = prepare_data("price", k, CFG.win_base, CFG.horizons_base)
        _, agg = bandit_run_once(CFG.seed_base, dfp, armsp, dxp,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["assets_max"]=k; rows.append(agg)
    out_assets = pd.concat(rows, ignore_index=True); out_assets.to_csv(f"{TBL_DIR}/sens_476_coverage_assetsmax.csv", index=False)
    plt.figure(figsize=(6.8,3.8))
    for h in CFG.horizons_base:
        tmp = out_assets[out_assets["horizon"]==h].sort_values("assets_max")
        plt.plot(tmp["assets_max"], tmp["MAE"], marker="o", label=f"H={h}")
    plt.xlabel("assets_max"); plt.ylabel("MAE"); plt.title("assets_max vs. MAE"); plt.legend()
    savefig(f"{FIG_DIR}/sens_476_assetsmax_bars.png")

    # (ii) downsample training
    rows=[]
    for frac in CFG.downsample_fracs:
        dx = downsample_train(data_xy_price, arms_price, frac)
        _, agg = bandit_run_once(CFG.seed_base, df_price, arms_price, dx,
                                 hidden=CFG.lstm_hidden_base, layers=CFG.lstm_layers_base,
                                 dropout=CFG.lstm_dropout_base, lr=CFG.lr_base,
                                 ucb_c=CFG.ucb_c_base, rounds=CFG.teacher_rounds_base,
                                 steps_per_pull=CFG.train_steps_per_pull_base)
        agg["train_frac"]=frac; rows.append(agg)
    out_down = pd.concat(rows, ignore_index=True); out_down.to_csv(f"{TBL_DIR}/sens_476_coverage_downsample.csv", index=False)
    # figure
    plt.figure(figsize=(6.8,3.8))
    for h in CFG.horizons_base:
        tmp = out_down[out_down["horizon"]==h].sort_values("train_frac")
        plt.plot(tmp["train_frac"], tmp["MAE"], marker="o", label=f"H={h}")
    plt.xlabel("Train fraction kept"); plt.ylabel("MAE"); plt.title("Training coverage vs. MAE"); plt.legend()
    savefig(f"{FIG_DIR}/sens_476_downsample_line.png")

    return out_assets, out_down

# -----------------------------
# RUN ALL SWEEPS
# -----------------------------
print("\nRunning §4.7 sweeps... (this is a compact, CPU-friendly run)")
res_471 = sweep_seeds()
res_472 = sweep_teacher()
res_473_hidden, res_473_layers, res_473_dropout, res_473_lr = sweep_student()
res_474 = sweep_window()
res_475 = sweep_features()
res_476_assets, res_476_down = sweep_coverage()

# -----------------------------
# PRINT SUMMARY PATHS
# -----------------------------
print("\n=== §4.7 SENSITIVITY — ARTIFACT SUMMARY ===")
tbls = [f for f in sorted(os.listdir(TBL_DIR)) if f.startswith("sens_")]
figs = [f for f in sorted(os.listdir(FIG_DIR)) if f.startswith("sens_") and f.endswith(".png")]
print("Tables:")
for t in tbls: print(" -", os.path.join(TBL_DIR, t))
print("Figures:")
for f in figs[:24]: print(" -", os.path.join(FIG_DIR, f))
if len(figs)>24: print(f" ... and {len(figs)-24} more figures")


§4.7 Config
 {
  "ds_csv": "/content/export/tables/dataset_long_1D.csv",
  "assets_max_base": 5,
  "horizons_base": [
    1,
    3,
    7
  ],
  "win_base": 64,
  "batch_size": 256,
  "teacher_rounds_base": 80,
  "train_steps_per_pull_base": 1,
  "ucb_c_base": 1.2,
  "seed_base": 1337,
  "device": "cpu",
  "lstm_hidden_base": 64,
  "lstm_layers_base": 1,
  "lstm_dropout_base": 0.0,
  "lr_base": 0.002,
  "eval_n_trace": 200,
  "patience_rounds": 15,
  "eval_every": 10,
  "seeds": [
    42,
    1337,
    2024
  ],
  "ucb_c_vals": [
    0.6,
    1.2,
    2.0
  ],
  "rounds_vals": [
    60,
    120
  ],
  "steps_vals": [
    1,
    2
  ],
  "hidden_vals": [
    32,
    64
  ],
  "layers_vals": [
    1,
    2
  ],
  "dropout_vals": [
    0.0,
    0.2
  ],
  "lr_vals": [
    0.001,
    0.002
  ],
  "win_vals": [
    32,
    64,
    128
  ],
  "feature_sets": [
    "price",
    "aug"
  ],
  "assets_max_vals": [
    3,
    5
  ],
  "downsample_fracs": [
    1.0,
    0.5,
    0.25
  ]
}
[clean]