<a href="https://colab.research.google.com/github/brendonhuynhbp-hub/gt-markets/blob/main/notebooks/GoogleTrends_Financial_Modeling_Backtest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Setup: Mount Drive + Paths

In [15]:
# =========================================================
# SETUP
# - Mount Drive, resolve project paths, seed everything
# - Load merged dataset & keyword list (correct file)
# - Auto-detect/fix missing trend columns (fill with 0)
# - Optional weekly aggregation
# - Utilities (target, keyword mapping, lags, logging)
# - Structured output folders for this run
# =========================================================

# ---- 0) Global switches ---------------------------------
DEBUG = False        # True = faster dev loop; False = full run
FREQ  = "D"         # "D" daily, "W" weekly aggregation

# ---- 1) Drive + paths -----------------------------------
from google.colab import drive
drive.mount('/content/drive')

from pathlib import Path
import os, sys, warnings, random, json, re
import numpy as np
import pandas as pd

CANDIDATE_PROJECT_DIRS = [
    Path("/content/drive/MyDrive/gt-markets"),
    Path("/content/drive/Shareddrives/gt-markets"),
]
PROJECT_DIR = next((p for p in CANDIDATE_PROJECT_DIRS if p.exists()), None)
assert PROJECT_DIR is not None, "Project directory not found in Drive."

DATA_DIR = PROJECT_DIR / "data" / "processed"
KW_DIR   = PROJECT_DIR / "data" / "Keyword Selection"   # ← exact folder name (with space)
OUT_DIR  = PROJECT_DIR / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Correct files from older code
RAW_FILE = DATA_DIR / "merged_financial_trends_data_2025-09-07.csv"
ENG_FILE = DATA_DIR / "merged_financial_trends_engineered_2025-09-07.csv"
KW_CSV   = KW_DIR  / "combined_significant_lagged_correlations.csv"

assert RAW_FILE.exists(), f"Missing dataset: {RAW_FILE}"
assert KW_CSV.exists(),   f"Missing keyword file: {KW_CSV}"

print(f"PROJECT_DIR: {PROJECT_DIR}")
print(f"DATA_DIR:    {DATA_DIR}")
print(f"KW_DIR:      {KW_DIR}")
print(f"OUT_DIR:     {OUT_DIR}")

# ---- 2) Reproducibility + warnings ----------------------
SEED = 42
random.seed(SEED); np.random.seed(SEED)
warnings.filterwarnings("ignore")

# ---- 3) ML/DL stack ------------------------------------
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

import tensorflow as tf
from tensorflow import keras
tf.keras.utils.set_random_seed(SEED)

# Keep TF from grabbing all GPU memory (prevents OOM)
try:
    gpus = tf.config.list_physical_devices('GPU')
    for g in gpus:
        tf.config.experimental.set_memory_growth(g, True)
    print(f"TF GPU devices: {len(gpus)} (mem growth on)" if gpus else "TF on CPU")
except Exception as e:
    print("TF GPU setup note:", e)
print("TF version:", tf.__version__)

# ---- 4) Asset registry ---------------------------------
ASSETS = [
    {"PAIR_ID": "GC=F",      "price_col": "GC=F Close",      "label": "Gold"},
    {"PAIR_ID": "BTC-USD",   "price_col": "BTC-USD Close",   "label": "BTC"},
    {"PAIR_ID": "CL=F",      "price_col": "CL=F Close",      "label": "Oil"},
    {"PAIR_ID": "USDCNY=X",  "price_col": "USDCNY=X Close",  "label": "USDCNY"},
]
asset_by_label = {a["label"].lower(): a for a in ASSETS}

# ---- 5) Load merged table + fix trend NaNs --------------
# Start with RAW by default; ENG will be selectable in Runner
df0 = pd.read_csv(RAW_FILE, parse_dates=["Date"]).set_index("Date").sort_index()

# Auto-detect trend columns and fill any missing values with zero.
trend_cols_all = [c for c in df0.columns if "trend" in c.lower()]
bad_trend_cols = [c for c in trend_cols_all if df0[c].isna().any()]
if bad_trend_cols:
    print("Trend columns with missing values (auto-filled to 0):",
          bad_trend_cols[:10], "..." if len(bad_trend_cols)>10 else "")
    df0[bad_trend_cols] = df0[bad_trend_cols].fillna(0.0)

# Optional: debug slice AFTER filling so lags won’t reintroduce NaNs
if DEBUG:
    df0 = df0.tail(1000)
    print("DEBUG: using last 1000 rows only.")

# ---- 6) Optional weekly aggregation ---------------------
def to_frequency(df_in: pd.DataFrame, freq: str = "D") -> pd.DataFrame:
    """
    Resample to end-of-week for prices (last), and mean for non-price columns.
    """
    if freq.upper() == "D":
        return df_in
    assert freq.upper() == "W", "Supported frequencies: 'D' or 'W'."
    out = pd.DataFrame(index=df_in.resample("W").last().index)
    # prices: last of week
    for a in ASSETS:
        out[a["price_col"]] = df_in[a["price_col"]].resample("W").last()
    # other columns: mean of week
    price_cols = {a["price_col"] for a in ASSETS}
    other_cols = [c for c in df_in.columns if c not in price_cols]
    for c in other_cols:
        out[c] = df_in[c].resample("W").mean()
    return out

df_basefreq = to_frequency(df0, FREQ)

# ---- 7) Utilities ---------------------------------------
from functools import lru_cache
from datetime import datetime

@lru_cache(maxsize=None)
def load_keywords_for_pair(csv_path: Path, pair_id: str) -> list:
    d = pd.read_csv(csv_path)
    assert {"Pair","Keyword"}.issubset(d.columns), "Keyword CSV must have: Pair, Keyword"
    # tolerate USDCNY aliasing
    aliases = {pair_id}
    if pair_id == "USDCNY=X": aliases |= {"CNY=X"}
    if pair_id == "CNY=X":    aliases |= {"USDCNY=X"}
    kws = (d.loc[d["Pair"].isin(aliases), "Keyword"]
             .dropna().astype(str).str.strip().str.lower().unique().tolist())
    return kws

def map_keywords_to_trend_cols(all_cols: pd.Index, keywords: list) -> list:
    # keyword "gold price" -> "gold_price_trend"
    norm = lambda s: str(s).lower().strip().replace(" ", "_")
    desired = {f"{norm(k)}_trend" for k in keywords}
    return [c for c in all_cols if str(c).lower() in desired]

def make_target(df: pd.DataFrame, price_col: str) -> pd.DataFrame:
    out = df.copy()
    out["ret1"] = out[price_col].pct_change()
    out["y_up"] = (out[price_col].shift(-1) > out[price_col]).astype(int)
    return out.dropna(subset=[price_col]).dropna()

def build_trend_lag_features(df_in: pd.DataFrame, sel_cols: list, lag_steps=(7, 14, 21)) -> pd.DataFrame:
    """
    Build percentage-change features over given lags, then shift(+1) for causality.
    Handles cases where previous value is 0 to avoid +/-inf; sanitizes NaN/inf and clips outliers.
    """
    out = {}
    for c in sel_cols:
        s = df_in[c].astype(float)
        for L in lag_steps:
            chg = s.pct_change(L).shift(1)
            chg = chg.replace([np.inf, -np.inf], np.nan).fillna(0.0)
            # clip absurd pct changes to stabilize scaling (optional but recommended)
            chg = chg.clip(lower=-10.0, upper=10.0)
            out[f"{c}__chg{L}"] = chg
    return pd.DataFrame(out, index=df_in.index)

def pick_start_index(n: int, frac: float = 0.6) -> int:
    return max(50 if DEBUG else 200, int(n * (0.8 if DEBUG else frac)))

# ---- 8) Structured output folders for this run ----------
RUN_MODE  = "debug" if DEBUG else "prod"
RUN_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S")
RUN_ID    = f"{FREQ.lower()}_{RUN_MODE}_{RUN_STAMP}"

RUN_ROOT = OUT_DIR / "runs" / RUN_ID
STAGES = {
    "data":        RUN_ROOT / "00_data",
    "logs":        RUN_ROOT / "10_logs",
    "preds_val":   RUN_ROOT / "20_preds" / "val",
    "preds_test":  RUN_ROOT / "20_preds" / "test",
    "backtests":   RUN_ROOT / "30_backtests",
    "figs":        RUN_ROOT / "40_figs",
    "leaderboard": RUN_ROOT / "50_leaderboards",
}
for p in STAGES.values():
    p.mkdir(parents=True, exist_ok=True)

# Snapshot (for report reproducibility)
(df0.head(1)
 .assign(_start=df_basefreq.index.min(), _end=df_basefreq.index.max())
 .to_csv(STAGES["data"] / f"dataset_snapshot_{RAW_FILE.name}.head1.csv"))

def _slug(s: str) -> str:
    return re.sub(r"[^a-z0-9]+", "_", s.lower()).strip("_")

def _pred_name(asset_label: str, dataset: str, model: str, *, window: int | None = None, split="test") -> str:
    bits = [_slug(asset_label), dataset, "dl", model.lower()]
    if window: bits.append(f"w{window}")
    bits.append("extended")
    return ".".join(["_".join(bits), split, "csv"])

def _log_name(asset_label: str, dataset: str, run_name: str) -> str:
    return f"{_slug(asset_label)}_{dataset}_{_slug(run_name)}.{RUN_STAMP}.txt"

def _fig_name(asset_label: str, tail: str) -> str:
    return f"{_slug(asset_label)}_{tail}.png"

def save_txt_log(asset_label: str, dataset: str, run_name: str, text_lines: list[str]) -> Path:
    path = STAGES["logs"] / _log_name(asset_label, dataset, run_name)
    with open(path, "w") as f:
        f.write("\n".join(text_lines))
    return path

def save_preds_df(df_pred: pd.DataFrame, asset_label: str, dataset: str, model: str, *, window: int | None, split: str):
    stage = STAGES["preds_val"] if split == "val" else STAGES["preds_test"]
    outp = stage / _pred_name(asset_label, dataset, model, window=window, split=split)
    df_pred.to_csv(outp)
    return outp

def save_leaderboard(df_leader: pd.DataFrame, tag: str = "metrics") -> Path:
    path = STAGES["leaderboard"] / f"leaderboard_{tag}.csv"
    df_leader.to_csv(path, index=False)
    return path

def save_backtest_table(df_bt: pd.DataFrame, asset_label: str, model_tag: str) -> Path:
    path = STAGES["backtests"] / f"{_slug(asset_label)}_{_slug(model_tag)}_backtest.csv"
    df_bt.to_csv(path)
    return path

def save_figure(fig, asset_label: str, tail: str) -> Path:
    path = STAGES["figs"] / _fig_name(asset_label, tail)
    fig.savefig(path, dpi=150, bbox_inches="tight")
    return path

print(f"Output run folder: {RUN_ROOT}")
for name, path in STAGES.items():
    print(f"{name:>12}: {path}")
print("Setup complete → base frame:", df_basefreq.index.min().date(), "→", df_basefreq.index.max().date(), "| rows:", len(df_basefreq))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
PROJECT_DIR: /content/drive/MyDrive/gt-markets
DATA_DIR:    /content/drive/MyDrive/gt-markets/data/processed
KW_DIR:      /content/drive/MyDrive/gt-markets/data/Keyword Selection
OUT_DIR:     /content/drive/MyDrive/gt-markets/outputs
TF on CPU
TF version: 2.19.0
Trend columns with missing values (auto-filled to 0): ['entrepreneurial_trend', 'cryptocurrency_trend'] 
DEBUG: using last 1000 rows only.
Output run folder: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527
        data: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/00_data
        logs: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/10_logs
   preds_val: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/20_preds/val
  preds_test: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/20_preds/t

In [16]:
# =========================================================
# RUNNER
# - Build extended features (no blanket dropna)
# - Purged/embargoed splits
# - LSTM (primary) + MLP (ablation)
# - Save VAL + TEST predictions to structured folders
# - TXT logs to /10_logs
# =========================================================

# ---- 1) Split helpers -----------------------------------
def make_purged_splits(n: int, train=0.70, valid=0.15, embargo: int = 5):
    """
    Purged + embargoed indices to reduce leakage:
      train: [0 : i_tr)
      gap1:  [i_tr : i_tr+embargo)
      val:   [i_tr+embargo : i_va)
      gap2:  [i_va : i_va+embargo)
      test:  [i_va+embargo : n)
    """
    i_tr = int(n * train)
    i_va = int(n * (train + valid))
    tr = slice(0, i_tr)
    va = slice(min(i_tr + embargo, i_va), i_va)
    te = slice(min(i_va + embargo, n), n)
    return tr, va, te

# ---- 2) DL builders -------------------------------------
def make_mlp(input_dim: int):
    tf.keras.utils.set_random_seed(SEED)
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(128, activation="relu"),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.30),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.30),
        keras.layers.Dense(1, activation="sigmoid")
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss="binary_crossentropy",
        metrics=[keras.metrics.AUC(name="auc"), keras.metrics.BinaryAccuracy(name="acc")]
    )
    return model

def make_lstm(input_shape, units=64, dropout=0.2):
    tf.keras.utils.set_random_seed(SEED)
    model = keras.Sequential([
        keras.layers.Input(shape=input_shape),          # (window, n_features)
        keras.layers.LSTM(units, return_sequences=True),
        keras.layers.Dropout(dropout),
        keras.layers.LSTM(units//2),
        keras.layers.Dropout(dropout),
        keras.layers.Dense(1, activation="sigmoid"),
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss="binary_crossentropy",
        metrics=[keras.metrics.AUC(name="auc"), keras.metrics.BinaryAccuracy(name="acc")]
    )
    return model

def build_sequences_from_extended(df_ext: pd.DataFrame, feature_cols: list, y_col="y_up", window=30):
    X = df_ext[feature_cols].values
    y = df_ext[y_col].astype(int).values
    idx = df_ext.index
    xs, ys, dates = [], [], []
    for t in range(window, len(df_ext)):
        xs.append(X[t-window:t, :]); ys.append(y[t]); dates.append(idx[t])
    return np.asarray(xs), np.asarray(ys), pd.DatetimeIndex(dates)

def split_scale_sequences(X_seq, y_seq, train=0.70, valid=0.15):
    # time split first (70/15/15)
    n = len(X_seq); i_tr = int(n*train); i_va = int(n*(train+valid))
    X_tr, X_va, X_te = X_seq[:i_tr], X_seq[i_tr:i_va], X_seq[i_va:]
    y_tr, y_va, y_te = y_seq[:i_tr], y_seq[i_tr:i_va], y_seq[i_va:]
    # fit scaler on train only, apply to all splits
    if len(X_tr)==0: return (X_tr,y_tr),(X_va,y_va),(X_te,y_te)
    T,W,F = X_tr.shape
    scaler = StandardScaler().fit(X_tr.reshape(T*W, F))
    def _tf(x):
        if len(x)==0: return x
        TT,WW,FF = x.shape
        return scaler.transform(x.reshape(TT*WW,FF)).reshape(TT,WW,FF)
    return (_tf(X_tr),y_tr),(_tf(X_va),y_va),(_tf(X_te),y_te)

# ---- 3) Main per-asset runner ---------------------------
def run_asset(asset: dict, dataset_version: str = "raw", use_dl: bool = True, use_dl_mode: str = "lstm"):
    """
    - dataset_version: "raw" or "eng" (tag only; switch df source here if needed)
    - use_dl_mode: "mlp" | "lstm" | "both"
    """
    label      = asset["label"]
    price_col  = asset["price_col"]
    label_tag  = f"{label} [{dataset_version}]"

    # If wanting to run engineered file too, swap df_basefreq source here:
    base_source = df_basefreq  # replace with engineered resample if using ENG_FILE externally

    # Base target frame (on chosen frequency)
    base = make_target(base_source[[price_col]], price_col).dropna(subset=[price_col, "y_up"])

    # Keyword selection for this pair
    kws = load_keywords_for_pair(KW_CSV, asset["PAIR_ID"])
    trend_cols = [c for c in map_keywords_to_trend_cols(base_source.columns, kws) if c in base_source.columns]

    # Lag features
    lag_df = build_trend_lag_features(base_source, trend_cols, lag_steps=(7,14,21))

    # Extended frame: join without blanket dropna; fill safe defaults for trend/lag features
    ext = base.join(base_source[trend_cols], how="left").join(lag_df, how="left")
    if trend_cols: ext[trend_cols] = ext[trend_cols].fillna(0.0)
    lag_cols = [c for c in ext.columns if "__chg" in c]
    if lag_cols:  ext[lag_cols]  = ext[lag_cols].fillna(0.0)

    # Final sanitization: no infinities; clip extreme outliers for stability
    num_cols = [c for c in ext.columns if c not in {price_col, "ret1", "y_up"}]
    if num_cols:
        ext[num_cols] = ext[num_cols].replace([np.inf, -np.inf], 0.0)
        ext[num_cols] = ext[num_cols].clip(lower=-10.0, upper=10.0)

    ext = ext.dropna(subset=[price_col, "y_up"]).copy()

    # Feature set for DL
    exclude = {price_col, "ret1", "y_up"}
    extended_cols = [c for c in ext.columns if c not in exclude]
    rows_used = len(ext)

    results = {}
    if use_dl:
        modes = [use_dl_mode] if use_dl_mode in {"mlp","lstm"} else ["mlp","lstm"]

        for mode in modes:
            if mode == "mlp":
                # ------- MLP on tabular extended features -------
                X = ext[extended_cols].values
                y = ext["y_up"].astype(int).values

                tr, va, te = make_purged_splits(len(ext), train=0.70, valid=0.15, embargo=5)
                X_tr, X_va, X_te = X[tr], X[va], X[te]
                y_tr, y_va, y_te = y[tr], y[va], y[te]

                scaler = StandardScaler().fit(X_tr)
                X_tr, X_va, X_te = scaler.transform(X_tr), scaler.transform(X_va), scaler.transform(X_te)

                MAX_EPOCHS = 3 if DEBUG else 30
                BATCH      = 32 if DEBUG else 64
                es = keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=5, restore_best_weights=True)

                mlp = make_mlp(X_tr.shape[1])
                mlp.fit(X_tr, y_tr, validation_data=(X_va, y_va), epochs=MAX_EPOCHS, batch_size=BATCH, callbacks=[es], verbose=0)

                p_va = mlp.predict(X_va, verbose=0).ravel()
                h_va = (p_va >= 0.5).astype(int)
                p_te = mlp.predict(X_te, verbose=0).ravel()
                h_te = (p_te >= 0.5).astype(int)

                m = {
                    "acc": accuracy_score(y_te, h_te) if len(y_te) else float("nan"),
                    "f1":  f1_score(y_te, h_te) if len(y_te) and len(np.unique(y_te))>1 else float("nan"),
                    "auc": roc_auc_score(y_te, p_te) if len(y_te) and len(np.unique(y_te))>1 else float("nan"),
                }
                results["MLP"] = (None, m)

                # Save VAL + TEST predictions
                df_val  = pd.DataFrame({"date": ext.index[va], "y_true": y_va, "y_pred": h_va, "prob_up": p_va}).set_index("date")
                df_test = pd.DataFrame({"date": ext.index[te], "y_true": y_te, "y_pred": h_te, "prob_up": p_te}).set_index("date")
                save_preds_df(df_val,  label, dataset_version, "mlp", window=None, split="val")
                save_preds_df(df_test, label, dataset_version, "mlp", window=None, split="test")

            elif mode == "lstm":
                # ------- LSTM on sequences of extended features -------
                WINDOW     = 30
                MAX_EPOCHS = 3 if DEBUG else 50
                BATCH      = 32 if DEBUG else 64

                Xseq, yseq, idx = build_sequences_from_extended(ext, extended_cols, "y_up", window=WINDOW)
                nseq = len(Xseq)
                if nseq < 50:
                    print(f"[WARN] Not enough samples for {label_tag} (LSTM).")
                else:
                    # Purged indices for sequences
                    tr, va, te = make_purged_splits(nseq, train=0.70, valid=0.15, embargo=5)
                    (X_tr_raw,y_tr_raw), (X_va_raw,y_va_raw), (X_te_raw,y_te_raw) = split_scale_sequences(Xseq, yseq, train=0.70, valid=0.15)

                    # Re-slice after scaling to align with purged indices
                    i_tr_end = int(nseq*0.70)
                    i_va_end = int(nseq*0.85)
                    X_tr, y_tr = X_tr_raw[tr], y_tr_raw[tr]
                    X_va, y_va = X_va_raw[va.start - i_tr_end: va.stop - i_tr_end], y_va_raw[va.start - i_tr_end: va.stop - i_tr_end]
                    X_te, y_te = X_te_raw[te.start - i_va_end:],                y_te_raw[te.start - i_va_end:]

                    if len(X_va)==0 or len(X_te)==0:
                        print(f"[WARN] Not enough samples after purge for {label_tag} (LSTM).")
                    else:
                        es = keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=6, restore_best_weights=True)
                        lstm = make_lstm(input_shape=X_tr.shape[1:])
                        lstm.fit(X_tr, y_tr, validation_data=(X_va, y_va), epochs=MAX_EPOCHS, batch_size=BATCH, callbacks=[es], verbose=0)

                        p_va = lstm.predict(X_va, verbose=0).ravel()
                        h_va = (p_va >= 0.5).astype(int)
                        p_te = lstm.predict(X_te, verbose=0).ravel()
                        h_te = (p_te >= 0.5).astype(int)

                        m = {
                            "acc": accuracy_score(y_te, h_te) if len(y_te) else float("nan"),
                            "f1":  f1_score(y_te, h_te) if len(y_te) and len(np.unique(y_te))>1 else float("nan"),
                            "auc": roc_auc_score(y_te, p_te) if len(y_te) and len(np.unique(y_te))>1 else float("nan"),
                        }
                        results["LSTM"] = (None, m)

                        # Map sequence indices back to dates
                        val_idx  = idx[va]
                        test_idx = idx[te]
                        df_val  = pd.DataFrame({"date": val_idx,  "y_true": y_va, "y_pred": h_va, "prob_up": p_va}).set_index("date")
                        df_test = pd.DataFrame({"date": test_idx, "y_true": y_te, "y_pred": h_te, "prob_up": p_te}).set_index("date")
                        save_preds_df(df_val,  label, dataset_version, "lstm", window=WINDOW, split="val")
                        save_preds_df(df_test, label, dataset_version, "lstm", window=WINDOW, split="test")

    # ---- TXT log into /10_logs ---------------------------
    if results:
        lines = [
            f"{label} [{dataset_version}] — DL {','.join(results.keys())} (extended)",
            f"[Run] FREQ={FREQ} | DEBUG={DEBUG} | RUN_ID={RUN_ID}",
            f"[Data] Rows used: {rows_used} | Features: {len(extended_cols)}",
            f"[Extended] Keywords used: {len(kws)}",
        ]
        for name, (_x, met) in results.items():
            lines.append(f"{name}: ACC={met.get('acc', float('nan')):.3f} F1={met.get('f1', float('nan')):.3f} AUC={met.get('auc', float('nan')):.3f}")
        save_txt_log(label, dataset_version, f"DL_{','.join(results.keys())}_extended", lines)

    print(f"✅ Finished {label_tag} | rows: {rows_used} | features: {len(extended_cols)}")


In [17]:
# =========================================================
# CONVENIENCE
# - Batch runners for all assets
# - Leaderboard builder (from logs)
# - Backtest helpers (figures + summaries)
# =========================================================

def run_all_pairs(use_dl_mode="lstm", dataset_version="raw"):
    for lbl, asset in asset_by_label.items():
        print("="*70, f"\n{lbl.upper()} — {use_dl_mode.upper()} [{dataset_version}] FREQ={FREQ} DEBUG={DEBUG}")
        run_asset(asset, dataset_version=dataset_version, use_dl=True, use_dl_mode=use_dl_mode)

def run_all_pairs_lstm(dataset_version="raw"):
    run_all_pairs(use_dl_mode="lstm", dataset_version=dataset_version)

def run_all_pairs_mlp(dataset_version="raw"):
    run_all_pairs(use_dl_mode="mlp", dataset_version=dataset_version)

# ---- leaderboard from /10_logs ---------------------------
def build_leaderboard_from_logs(run_root: Path = RUN_ROOT):
    pat_hdr   = re.compile(r"^(?P<label>.+?) \[(?P<dataset>.+?)\] — (?P<run>.+)$")
    pat_model = re.compile(r"^(?P<model>LR|RF|XGB|MLP|LSTM): ACC=(?P<acc>[\d\.]+) F1=(?P<f1>[\d\.NaN]+) AUC=(?P<auc>[\d\.NaN]+)")
    rows = []
    for fp in sorted((run_root / "10_logs").glob("*.txt")):
        label = dataset = run = None
        for ln in open(fp, "r"):
            ln = ln.strip()
            m1 = pat_hdr.match(ln)
            if m1:
                label, dataset, run = m1.group("label"), m1.group("dataset"), m1.group("run"); continue
            m2 = pat_model.match(ln)
            if m2 and label and run:
                rows.append({
                    "file": fp.name,
                    "asset": label,
                    "dataset": dataset,
                    "run_type": run,
                    "model": m2.group("model"),
                    "ACC": float(m2.group("acc")),
                    "F1":  float("nan") if m2.group("f1")=="NaN" else float(m2.group("f1")),
                    "AUC": float("nan") if m2.group("auc")=="NaN" else float(m2.group("auc")),
                })
    df_leader = (pd.DataFrame(rows)
                 .sort_values(["asset","dataset","AUC","ACC"], ascending=[True,True,False,False]))
    path = save_leaderboard(df_leader, tag="metrics")
    print("Leaderboard saved:", path)
    return df_leader

# ---- backtest helpers -----------------------------------
import matplotlib.pyplot as plt

def backtest_pred_file(pred_csv_path: Path, price_csv_path: Path, asset_label: str,
                       up_thr=0.55, down_thr=0.45, fee_bps=0.0005):
    PRICE_COL_MAP = {a["label"]: a["price_col"] for a in ASSETS}
    px = pd.read_csv(price_csv_path, parse_dates=["Date"]).set_index("Date")
    price_col = PRICE_COL_MAP[asset_label]
    px = px[[price_col]].rename(columns={price_col:"Close"})
    px["ret1"] = px["Close"].pct_change()

    preds = pd.read_csv(pred_csv_path, parse_dates=["date"]).set_index("date")
    dfb = preds.join(px, how="inner").dropna()
    dfb["pos"] = np.where(dfb["prob_up"] >= up_thr, 1, np.where(dfb["prob_up"] <= down_thr, -1, 0))
    dfb["pos_shift"] = dfb["pos"].shift(1).fillna(0)
    dfb["turnover"] = (dfb["pos"] != dfb["pos_shift"]).astype(int)
    dfb["pnl"] = dfb["pos_shift"] * dfb["ret1"] - dfb["turnover"] * fee_bps
    dfb["cum_pnl"] = (1 + dfb["pnl"]).cumprod()
    return dfb

def plot_and_save_equity(df_bt: pd.DataFrame, asset_label: str, model_tag: str):
    fig = plt.figure()
    df_bt["cum_pnl"].plot()
    plt.title(f"{asset_label} – {model_tag}")
    plt.xlabel("Date"); plt.ylabel("Equity")
    path = save_figure(fig, asset_label, f"{_slug(model_tag)}_equity")
    plt.close(fig)
    print("Figure saved:", path)
    return path

def select_best_models(df_leader):
    # top by AUC then ACC per asset/dataset
    return (df_leader.sort_values(["asset","dataset","AUC","ACC"], ascending=[True,True,False,False])
                     .groupby(["asset","dataset"]).head(1).reset_index(drop=True))

def backtest_best_from_leaderboard(df_leader: pd.DataFrame, *, only_model=("LSTM","MLP")):
    best = (df_leader[df_leader["model"].isin(only_model)]
            .sort_values(["asset","dataset","AUC","ACC"], ascending=[True,True,False,False])
            .groupby(["asset","dataset"]).head(1))

    summaries = []
    for _, row in best.iterrows():
        asset, dataset, model = row["asset"], row["dataset"], row["model"]
        # Guess test filename based on convention
        guess = _pred_name(asset, dataset, "lstm", window=30, split="test") if model=="LSTM" \
                else _pred_name(asset, dataset, "mlp",  window=None, split="test")
        pred_path = STAGES["preds_test"] / guess
        if not pred_path.exists():
            print("Skip (no preds):", pred_path.name); continue

        bt = backtest_pred_file(pred_path, RAW_FILE, asset, up_thr=0.55, down_thr=0.45, fee_bps=0.0005)
        model_tag = f"{model} ({dataset})"
        save_backtest_table(bt, asset, model_tag)
        plot_and_save_equity(bt, asset, model_tag)

        summaries.append({
            "asset": asset,
            "dataset": dataset,
            "model": model,
            "final_equity": float(bt["cum_pnl"].iloc[-1]),
            "obs": int(bt["cum_pnl"].shape[0])
        })

    if summaries:
        df_sum = pd.DataFrame(summaries).sort_values(["asset","final_equity"], ascending=[True,False])
        path = STAGES["leaderboard"] / "backtest_summary.csv"
        df_sum.to_csv(path, index=False)
        print("Backtest summary saved:", path)
        print(df_sum)
    else:
        print("No summaries produced (did any TEST predictions save?).")


In [18]:
# =========================================================
# RUNS
# - Print config
# - Train/evaluate (LSTM primary)
# - Build leaderboard
# - Backtest best per asset
# =========================================================

print("=== RUN CONFIG ===")
print("FREQ:", FREQ, "| DEBUG:", DEBUG, "| RUN_ID:", RUN_ID)
print("OUTPUT ROOT:", RUN_ROOT)

# 1) Train/evaluate (LSTM; switch to 'both' or 'mlp' if needed)
run_all_pairs_lstm(dataset_version="raw")

# 2) (Optional) also compare engineered dataset if available:
# If using engineered features file externally, you can construct df_basefreq_eng and call:
# run_all_pairs_lstm(dataset_version="eng")

# 3) Leaderboard (metrics)
df_leader = build_leaderboard_from_logs(RUN_ROOT)

# 4) Backtest top models (saves tables + equity charts)
backtest_best_from_leaderboard(df_leader, only_model=("LSTM","MLP"))

print("\n=== ARTIFACTS ===")
for name, path in STAGES.items():
    num = len(list(path.glob("*")))
    print(f"{name:>12}: {path}  ({num} files)")


=== RUN CONFIG ===
FREQ: D | DEBUG: True | RUN_ID: d_debug_20250912-062527
OUTPUT ROOT: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527
GOLD — LSTM [raw] FREQ=D DEBUG=True
✅ Finished Gold [raw] | rows: 999 | features: 60
BTC — LSTM [raw] FREQ=D DEBUG=True
✅ Finished BTC [raw] | rows: 999 | features: 88
OIL — LSTM [raw] FREQ=D DEBUG=True
✅ Finished Oil [raw] | rows: 999 | features: 96
USDCNY — LSTM [raw] FREQ=D DEBUG=True
✅ Finished USDCNY [raw] | rows: 999 | features: 112
Leaderboard saved: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/50_leaderboards/leaderboard_metrics.csv
Figure saved: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/40_figs/btc_lstm_raw_equity.png
Figure saved: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/40_figs/gold_lstm_raw_equity.png
Figure saved: /content/drive/MyDrive/gt-markets/outputs/runs/d_debug_20250912-062527/40_figs/oil_lstm_raw_equity.png
Figure saved: /con