# tradingagent — Walk-Forward Backtest (Colab GPU)

Run the full 4-layer screener backtest with **GPU-accelerated XGBoost** on Colab.

**Prerequisites:**
- Upload `data/ohlcv_all_a.pkl` and `data/benchmark_000905.pkl` to your Google Drive under `MyDrive/kronos/data/`.
- Or adjust the paths in the Config cell below.

**Runtime:** Select *Runtime → Change runtime type → T4 GPU*.

In [None]:
# Cell 1: Setup — install dependencies and mount Drive
from google.colab import drive
drive.mount('/content/drive')

import os
DRIVE_ROOT = '/content/drive/MyDrive/kronos'
os.makedirs(DRIVE_ROOT, exist_ok=True)
print(f'Drive root: {DRIVE_ROOT}')

In [None]:
# Cell 2: Clone/update repo, install deps, add to path
import sys

REPO_DIR = '/content/tradingagent'

if not os.path.exists(REPO_DIR):
    !git clone https://github.com/Yuxiaoliu12/tradingagent.git {REPO_DIR}
else:
    !cd {REPO_DIR} && git pull origin main

!pip install -q -r {REPO_DIR}/requirements.txt

sys.path.insert(0, REPO_DIR)
print(f'Repo at: {REPO_DIR}')
print(f'sys.path[0]: {sys.path[0]}')

In [None]:
# Cell 3: Config — GPU XGBoost override
from screener.config import ScreenerConfig

cfg = ScreenerConfig(
    ohlcv_pickle_path=os.path.join(DRIVE_ROOT, 'data/ohlcv_all_a.pkl'),
    benchmark_pickle_path=os.path.join(DRIVE_ROOT, 'data/benchmark_000905.pkl'),
    industry_pickle_path=os.path.join(DRIVE_ROOT, 'data/industry_mapping.pkl'),
    drive_root=os.path.join(DRIVE_ROOT, 'output/screener'),
)

# Enable GPU for XGBoost (Colab T4)
cfg.layer1_xgb_params['device'] = 'cuda'
cfg.layer2_xgb_params['device'] = 'cuda'

print('XGBoost device (Layer 1):', cfg.layer1_xgb_params.get('device'))
print('XGBoost device (Layer 2):', cfg.layer2_xgb_params.get('device'))
print(f'Train: {cfg.train_start} → {cfg.train_end}')
print(f'Backtest: {cfg.backtest_start} → {cfg.backtest_end}')
print(f'Run ID: {cfg.run_id}')
print(f'Run dir: {cfg.run_dir}')
print(f'Cache dir: {cfg.cache_dir}')

## RL Portfolio Agent (Layer 4)

Train a MaskablePPO agent on the same walk-forward windows. Uses L1+L2 signals as observations, manages a 3-stock portfolio with discrete position sizing, open/close execution timing, and action masking for A-share constraints.

**Workflow:** Run Cells 1-3 first (setup + config), then precompute L1+L2 signals below, then train/ablate.

In [None]:
# Cell 4: Precompute L1+L2 signals (run once, cached to Drive)
# After this, run_rl() loads cached signals and skips L1+L2 entirely.
import time
from screener.backtester import WalkForwardBacktester

bt = WalkForwardBacktester(cfg)

t0 = time.time()
bt.precompute_signals(verbose=True)
print(f'\nPrecompute wall time: {(time.time() - t0)/60:.1f} min')

In [None]:
# Cell 6: RL Smoke Test — verify env + PPO work before full run
# Uses synthetic signals (no L1+L2 needed). Should finish in <30 seconds.

import numpy as np
import pandas as pd

# ── 1. Action table ──────────────────────────────────────────────
from screener.portfolio_env import build_action_table, PortfolioEnv
actions = build_action_table(3, 3)
assert len(actions) == 63, f"Expected 63 actions, got {len(actions)}"
for a in actions:
    assert a[0] + a[1] + a[2] <= 1.0 + 1e-9, f"Weights exceed 1: {a}"
print(f"[OK] Action table: {len(actions)} actions, all weights sum <= 1")

# ── 2. Build synthetic env ───────────────────────────────────────
# 100 trading days, 50 fake stocks with random OHLCV
np.random.seed(42)
n_days, n_stocks = 100, 50
dates = pd.bdate_range("2020-01-01", periods=n_days)

ohlcv_dict = {}
for i in range(n_stocks):
    sym = f"sh.{600000+i}"
    close = 10.0 * np.exp(np.cumsum(np.random.randn(n_days) * 0.02))
    df = pd.DataFrame({
        "open": close * (1 + np.random.randn(n_days) * 0.005),
        "high": close * (1 + abs(np.random.randn(n_days) * 0.01)),
        "low":  close * (1 - abs(np.random.randn(n_days) * 0.01)),
        "close": close,
        "volume": np.random.randint(1000, 100000, n_days).astype(float),
    }, index=dates)
    ohlcv_dict[sym] = df

symbols = list(ohlcv_dict.keys())
bench_df = pd.DataFrame({"close": np.linspace(100, 110, n_days)}, index=dates)

# Fake daily signals: random L2 scores + upside/downside + features
daily_signals = []
for d in dates:
    upside = pd.Series(np.abs(np.random.randn(n_stocks)) * 0.05, index=symbols)
    downside = pd.Series(-np.abs(np.random.randn(n_stocks)) * 0.05, index=symbols)
    combined = upside + downside
    feats = pd.DataFrame(
        np.random.randn(n_stocks, 16),
        index=symbols,
        columns=[
            "macd", "macd_signal", "macd_hist", "rsi_14", "rsi_5",
            "ma5_slope", "ma20_slope", "ma60_slope", "bb_position",
            "volume_trend", "mom_5", "mom_10", "mom_20", "atr_14", "obv_slope",
            "industry_code",
        ],
    )
    # Industry code should be integers, not floats
    feats["industry_code"] = np.random.randint(0, 20, n_stocks)
    daily_signals.append({
        "date": d,
        "l2_scores": combined.sort_values(ascending=False),
        "l2_upside": upside,
        "l2_downside": downside,
        "l2_ranking": list(combined.sort_values(ascending=False).index),
        "l2_features": feats,
    })

from screener.config import ScreenerConfig
smoke_cfg = ScreenerConfig()

# ── 3. Env smoke test: random steps ─────────────────────────────
env = PortfolioEnv(smoke_cfg, daily_signals, ohlcv_dict, bench_df, training_mode=True)
obs, info = env.reset()
assert obs.shape == (40,), f"Obs shape: {obs.shape}"
assert not np.any(np.isnan(obs)), "NaN in initial obs"

total_reward = 0.0
for step in range(n_days - 1):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    assert obs.shape == (40,), f"Step {step}: obs shape {obs.shape}"
    assert not np.any(np.isnan(obs)), f"Step {step}: NaN in obs"
    total_reward += reward
    if terminated:
        break

final_nav = env._nav
print(f"[OK] Random agent: {step+1} steps, final NAV={final_nav:,.0f} "
      f"(return={final_nav/smoke_cfg.initial_capital - 1:+.2%}), "
      f"total reward={total_reward:.2f}")
assert final_nav > 0, "NAV went to zero"

# ── 4. Action mask sanity ────────────────────────────────────────
env2 = PortfolioEnv(smoke_cfg, daily_signals, ohlcv_dict, bench_df, training_mode=False)
obs2, _ = env2.reset()
mask = env2.action_masks()
assert mask.shape == (63,) and mask.dtype == bool and mask.any()
print(f"[OK] Action masks: {mask.sum()}/63 legal, {(~mask).sum()} masked (day 0)")

# ── 5. PPO smoke test: train 1000 steps ─────────────────────────
from screener.rl_trader import RLTrader

smoke_cfg_ppo = ScreenerConfig(
    rl_total_timesteps=1000,
    rl_batch_size=32,
    rl_n_steps=128,
)
train_env = PortfolioEnv(smoke_cfg_ppo, daily_signals, ohlcv_dict, bench_df, training_mode=True)
trader = RLTrader(smoke_cfg_ppo)
model = trader.train(train_env)

# Test inference with masks
from sb3_contrib.common.wrappers import ActionMasker
from sb3_contrib.common.maskable.utils import get_action_masks

test_env = PortfolioEnv(smoke_cfg_ppo, daily_signals, ohlcv_dict, bench_df, training_mode=False)
masked_test_env = ActionMasker(test_env, lambda e: e.action_masks())
obs, _ = masked_test_env.reset()
blocked = 0
for _ in range(20):
    masks = get_action_masks(masked_test_env)
    action, _ = model.predict(obs, deterministic=True, action_masks=masks)
    obs, reward, terminated, truncated, info = masked_test_env.step(int(action))
    blocked += len(info.get("blocked_trades", []))
    if terminated:
        break
print(f"[OK] PPO inference: 20 steps, NAV={test_env._nav:,.0f}, blocked={blocked}")

# Save/load round-trip
import tempfile, os
with tempfile.TemporaryDirectory() as tmpdir:
    path = os.path.join(tmpdir, "smoke_model")
    trader.save(model, path)
    loaded = trader.load(path)
    obs_test, _ = masked_test_env.reset()
    masks_test = get_action_masks(masked_test_env)
    a1, _ = model.predict(obs_test, deterministic=True, action_masks=masks_test)
    a2, _ = loaded.predict(obs_test, deterministic=True, action_masks=masks_test)
    assert a1 == a2, "Loaded model gives different action"
print("[OK] Model save/load round-trip")

print("\n=== All smoke tests passed ===")

In [None]:
# Cell 6b: Illegal-Action & Buy-Fallback Tests
# Verifies A-share constraints and L2 fallback behaviour with synthetic OHLCV.
# Should finish in <5 seconds.

import numpy as np
import pandas as pd
from screener.config import ScreenerConfig
from screener.portfolio_env import PortfolioEnv, build_action_table

test_cfg = ScreenerConfig()
dates = pd.bdate_range("2020-01-01", periods=10)

def _make_ohlcv(dates, close_vals, open_vals=None, volume_vals=None):
    """Helper: build a simple OHLCV DataFrame."""
    n = len(dates)
    close = np.array(close_vals, dtype=float)
    opn = np.array(open_vals if open_vals else close_vals, dtype=float)
    vol = np.array(volume_vals if volume_vals else [10000.0] * n, dtype=float)
    return pd.DataFrame({
        "open": opn, "high": np.maximum(opn, close) * 1.001,
        "low": np.minimum(opn, close) * 0.999,
        "close": close, "volume": vol,
    }, index=dates)

# ── Shared L2 feature columns ────────────────────────────────────
_feat_cols = [
    "macd", "macd_signal", "macd_hist", "rsi_14", "rsi_5",
    "ma5_slope", "ma20_slope", "ma60_slope", "bb_position",
    "volume_trend", "mom_5", "mom_10", "mom_20", "atr_14", "obv_slope",
    "industry_code",
]

def _make_signals(dates, symbols, ranking=None):
    """Build daily signals with fixed ranking order."""
    sigs = []
    for d in dates:
        scores = pd.Series(
            np.linspace(1, 0, len(symbols)), index=symbols
        )
        if ranking:
            scores = pd.Series(
                np.linspace(1, 0, len(ranking)), index=ranking
            )
        upside = pd.Series(
            np.linspace(0.5, 0, len(scores)), index=scores.index
        )
        downside = pd.Series(
            np.linspace(-0.1, -0.5, len(scores)), index=scores.index
        )
        feats = pd.DataFrame(
            np.zeros((len(scores), len(_feat_cols))),
            index=scores.index, columns=_feat_cols,
        )
        sigs.append({
            "date": d,
            "l2_scores": scores.sort_values(ascending=False),
            "l2_upside": upside,
            "l2_downside": downside,
            "l2_ranking": list(scores.sort_values(ascending=False).index),
            "l2_features": feats,
        })
    return sigs

bench_df = pd.DataFrame({"close": np.linspace(100, 110, len(dates))}, index=dates)

# ══════════════════════════════════════════════════════════════════
# BUY-SIDE TESTS: fallback when primary stock is blocked
# ══════════════════════════════════════════════════════════════════
print("=" * 60)
print("BUY-SIDE FALLBACK TESTS")
print("=" * 60)

# Stock A: limit-up at open on day 1 → blocked
# Stock B: normal → fallback target
# Ranking: A first, B second
prev_close_a = 10.0
limit_up_open_a = prev_close_a * 1.10  # exactly +10%
ohlcv_a = _make_ohlcv(
    dates,
    close_vals=[prev_close_a] + [limit_up_open_a] * 9,
    open_vals=[prev_close_a] + [limit_up_open_a] * 9,
)
ohlcv_b = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_dict_buy1 = {"sh.600001": ohlcv_a, "sh.600002": ohlcv_b}
ranking = ["sh.600001", "sh.600002"]
sigs = _make_signals(dates, ranking, ranking=ranking)

env = PortfolioEnv(test_cfg, sigs, ohlcv_dict_buy1, bench_df, training_mode=False)
obs, _ = env.reset()

# Action: put 1/3 weight on slot 0 (stock A), open timing
# Slot 0 should be sh.600001 (top ranked), slot 1 = sh.600002
action_table = build_action_table(3, 3)
# Find action: (1/3, 0, 0, "open", None, None)
act_idx = None
for i, a in enumerate(action_table):
    if a[:3] == (1/3, 0, 0) and a[3] == "open":
        act_idx = i
        break
assert act_idx is not None, "Could not find target action"

obs, reward, term, trunc, info = env.step(act_idx)
blocked = info.get("blocked_trades", [])
subs = info.get("substituted_trades", [])
assert len(subs) == 1, f"Expected 1 substitution, got {len(subs)}: {subs}"
assert subs[0]["slot_symbol"] == "sh.600001", f"Wrong slot sym: {subs[0]}"
assert subs[0]["bought_symbol"] == "sh.600002", f"Wrong bought sym: {subs[0]}"
assert "sh.600002" in env._holdings, "Fallback stock not in holdings"
assert "sh.600001" not in env._holdings, "Blocked stock should not be in holdings"
print(f"[OK] Limit-up fallback: A blocked, B bought. subs={subs}")

# Stock C: suspended (volume=0) → fallback to Stock D
ohlcv_c = _make_ohlcv(dates, close_vals=[10.0] * 10, volume_vals=[10000] + [0] * 9)
ohlcv_d = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_dict_buy2 = {"sh.600003": ohlcv_c, "sh.600004": ohlcv_d}
ranking2 = ["sh.600003", "sh.600004"]
sigs2 = _make_signals(dates, ranking2, ranking=ranking2)

env2 = PortfolioEnv(test_cfg, sigs2, ohlcv_dict_buy2, bench_df, training_mode=False)
obs2, _ = env2.reset()
obs2, _, _, _, info2 = env2.step(act_idx)
subs2 = info2.get("substituted_trades", [])
assert len(subs2) == 1, f"Expected 1 sub (suspension), got {len(subs2)}"
assert subs2[0]["bought_symbol"] == "sh.600004"
assert "sh.600004" in env2._holdings
print(f"[OK] Suspension fallback: C blocked (vol=0), D bought. subs={subs2}")

# Stock E: 一字板 at limit-up (O=H=L=C = prev_close * 1.10) → fallback to F
prev_close_e = 10.0
yizi_price = prev_close_e * 1.10
ohlcv_e = _make_ohlcv(
    dates,
    close_vals=[prev_close_e] + [yizi_price] * 9,
    open_vals=[prev_close_e] + [yizi_price] * 9,
)
# Force O=H=L=C for 一字板
for col in ["open", "high", "low", "close"]:
    ohlcv_e.iloc[1:, ohlcv_e.columns.get_loc(col)] = yizi_price

ohlcv_f = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_dict_buy3 = {"sh.600005": ohlcv_e, "sh.600006": ohlcv_f}
ranking3 = ["sh.600005", "sh.600006"]
sigs3 = _make_signals(dates, ranking3, ranking=ranking3)

env3 = PortfolioEnv(test_cfg, sigs3, ohlcv_dict_buy3, bench_df, training_mode=False)
obs3, _ = env3.reset()
obs3, _, _, _, info3 = env3.step(act_idx)
subs3 = info3.get("substituted_trades", [])
assert len(subs3) == 1, f"Expected 1 sub (一字板), got {len(subs3)}"
assert subs3[0]["bought_symbol"] == "sh.600006"
print(f"[OK] 一字板 limit-up fallback: E blocked, F bought. subs={subs3}")

# ══════════════════════════════════════════════════════════════════
# SELL-SIDE TESTS: blocked sells keep position
# ══════════════════════════════════════════════════════════════════
print("\n" + "=" * 60)
print("SELL-SIDE BLOCKING TESTS")
print("=" * 60)

# Find action: sell slot 0 (weight=0) with any timing
sell_act_idx = None
for i, a in enumerate(action_table):
    if a[:3] == (0, 0, 0):
        sell_act_idx = i
        break
assert sell_act_idx is not None

# Helper to set up an env with a held stock, then try to sell on day 2
def _sell_test(ohlcv_dict, sym, description):
    ranking = list(ohlcv_dict.keys())
    sigs = _make_signals(dates, ranking, ranking=ranking)
    env = PortfolioEnv(test_cfg, sigs, ohlcv_dict, bench_df, training_mode=False)
    obs, _ = env.reset()

    # Step 1: buy the stock (put 1/3 on slot 0)
    obs, _, _, _, info1 = env.step(act_idx)
    assert sym in env._holdings, f"Failed to buy {sym} on step 1"
    shares_before = env._holdings[sym]["shares"]

    # Step 2: try to sell (weight=0 on all slots → sell everything)
    obs, _, _, _, info2 = env.step(sell_act_idx)

    # T+1: hold_days was 0 after buy, incremented to 1 at step start,
    # so _can_sell should pass T+1. The constraint check tests the
    # specific sell-blocking scenario.
    return env, info2, shares_before

# Test: limit-down at open blocks sell
prev_close_g = 10.0
limit_down_price = prev_close_g * 0.90
ohlcv_g = _make_ohlcv(
    dates,
    close_vals=[prev_close_g, prev_close_g, limit_down_price] + [limit_down_price] * 7,
    open_vals=[prev_close_g, prev_close_g, limit_down_price] + [limit_down_price] * 7,
)
ohlcv_g2 = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_sell1 = {"sh.600007": ohlcv_g, "sh.600008": ohlcv_g2}
env_s1, info_s1, shares_s1 = _sell_test(ohlcv_sell1, "sh.600007", "limit-down")
blocked_s1 = [t for t in info_s1.get("blocked_trades", []) if t["symbol"] == "sh.600007"]
assert len(blocked_s1) > 0, "Limit-down sell should be blocked"
assert "sh.600007" in env_s1._holdings, "Position should be kept"
assert env_s1._holdings["sh.600007"]["shares"] == shares_s1, "Shares should be unchanged"
print(f"[OK] Limit-down sell blocked: position unchanged, blocked={blocked_s1}")

# Test: 一字板 blocks sell (O=H=L=C, any direction)
prev_close_h = 10.0
yizi_sell_price = prev_close_h * 0.98  # not limit-down, just flat bar
ohlcv_h = _make_ohlcv(
    dates,
    close_vals=[prev_close_h, prev_close_h, yizi_sell_price] + [yizi_sell_price] * 7,
    open_vals=[prev_close_h, prev_close_h, yizi_sell_price] + [yizi_sell_price] * 7,
)
# Force O=H=L=C for 一字板 on day 2
for col in ["open", "high", "low", "close"]:
    ohlcv_h.iloc[2, ohlcv_h.columns.get_loc(col)] = yizi_sell_price
ohlcv_h2 = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_sell2 = {"sh.600009": ohlcv_h, "sh.600010": ohlcv_h2}
env_s2, info_s2, shares_s2 = _sell_test(ohlcv_sell2, "sh.600009", "一字板")
blocked_s2 = [t for t in info_s2.get("blocked_trades", []) if t["symbol"] == "sh.600009"]
assert len(blocked_s2) > 0, "一字板 sell should be blocked"
assert env_s2._holdings["sh.600009"]["shares"] == shares_s2
print(f"[OK] 一字板 sell blocked: position unchanged, blocked={blocked_s2}")

# Test: suspended (volume=0) blocks sell
ohlcv_j = _make_ohlcv(
    dates,
    close_vals=[10.0] * 10,
    volume_vals=[10000, 10000, 0] + [0] * 7,
)
ohlcv_j2 = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_sell3 = {"sh.600011": ohlcv_j, "sh.600012": ohlcv_j2}
env_s3, info_s3, shares_s3 = _sell_test(ohlcv_sell3, "sh.600011", "suspension")
blocked_s3 = [t for t in info_s3.get("blocked_trades", []) if t["symbol"] == "sh.600011"]
assert len(blocked_s3) > 0, "Suspension sell should be blocked"
assert env_s3._holdings["sh.600011"]["shares"] == shares_s3
print(f"[OK] Suspension sell blocked: position unchanged, blocked={blocked_s3}")

# ══════════════════════════════════════════════════════════════════
# T+1 MASK TEST: can't sell stock bought today
# ══════════════════════════════════════════════════════════════════
print("\n" + "=" * 60)
print("T+1 MASK TEST")
print("=" * 60)

ohlcv_k = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_l = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_m = _make_ohlcv(dates, close_vals=[10.0] * 10)
ohlcv_t1 = {"sh.600013": ohlcv_k, "sh.600014": ohlcv_l, "sh.600015": ohlcv_m}
ranking_t1 = list(ohlcv_t1.keys())
sigs_t1 = _make_signals(dates, ranking_t1, ranking=ranking_t1)

env_t1 = PortfolioEnv(test_cfg, sigs_t1, ohlcv_t1, bench_df, training_mode=False)
obs_t1, _ = env_t1.reset()

# Buy stock on slot 0
obs_t1, _, _, _, _ = env_t1.step(act_idx)
assert "sh.600013" in env_t1._holdings or any(
    s in env_t1._holdings for s in ranking_t1
), "Should have bought something"

# Check action masks: selling the just-bought stock should still be
# allowed in the mask (hold_days=0 at mask time is fine — after increment
# it will be 1, meeting the T+1 minimum). The mask doesn't block this
# because hold_days >= 0 passes. The actual T+1 enforcement is in _can_sell
# (hold_days < 1 after increment). Since step() increments hold_days BEFORE
# sells, a stock with hold_days=0 at mask time → 1 after increment → sellable.
# This confirms the mask is correct: it does NOT over-block.
mask_t1 = env_t1.action_masks()
assert mask_t1[sell_act_idx], "All-zero weight action should be legal"
print(f"[OK] T+1 mask: sell action remains legal (hold_days=0 at mask time "
      f"→ 1 after increment → sellable)")

# Verify: if we manually set hold_days to -1 (hypothetical), mask still
# doesn't block (mask only checks hold_days < 0, which is a safety net).
# The real T+1 enforcement is in _can_sell during step().
print(f"[OK] T+1 enforced by _can_sell (hold_days < 1), not by mask")

print("\n" + "=" * 60)
print("=== All illegal-action tests passed ===")
print("=" * 60)

In [None]:
# Cell 7: Run RL backtest
import time
from screener.backtester import WalkForwardBacktester

bt = WalkForwardBacktester(cfg)

t0 = time.time()
rl_results = bt.run_rl(verbose=True)
rl_elapsed = time.time() - t0

print(f'\nRL backtest wall time: {rl_elapsed/60:.1f} min')

In [None]:
# Cell 7: Display RL results
import matplotlib.pyplot as plt

print('=== RL Backtest Metrics ===')
for k, v in rl_results['metrics'].items():
    print(f'  {k:>20}: {v:.4f}' if isinstance(v, float) else f'  {k:>20}: {v}')

print('\n=== Per-Window Results ===')
for wr in rl_results['window_results']:
    print(f"  Window {wr['window']+1}: {wr['test_start']}→{wr['test_end']}  "
          f"return={wr['test_return']*100:+.2f}%  NAV={wr['final_nav']:,.0f}  "
          f"blocked={wr['blocked_trades']}  subs={wr.get('substituted_trades', 0)}")

# NAV curve comparison
fig, ax = plt.subplots(figsize=(14, 5))
rl_nav = rl_results['nav_series']
if len(rl_nav) > 0:
    (rl_nav / rl_nav.iloc[0]).plot(ax=ax, label='RL Agent', linewidth=1.5)

# Overlay paper trader NAV if available
if 'results' in dir() and 'nav_series' in results:
    pt_nav = results['nav_series']
    if len(pt_nav) > 0:
        (pt_nav / pt_nav.iloc[0]).plot(ax=ax, label='Paper Trader', linewidth=1, alpha=0.7)

ax.set_title('Normalised NAV: RL Agent vs Paper Trader')
ax.set_ylabel('Growth of $1')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Save RL results
import pickle
rl_path = os.path.join(cfg.run_dir, 'rl_backtest_results.pkl')
os.makedirs(os.path.dirname(rl_path), exist_ok=True)
with open(rl_path, 'wb') as f:
    pickle.dump(rl_results, f)
print(f'\nRL results saved → {rl_path}')

## RL Ablation: Candidate Mode × Action Mode

Runs 6 variants to verify that L2 screening and the RL policy each contribute value.

| Label | candidate_mode | action_mode | What it tests |
|-------|---------------|-------------|---------------|
| A (baseline) | top | policy | Current RL agent |
| B | random_l2 | policy | Does rank within top 30 matter? |
| C | bottom_l2 | policy | Does L2 ordering matter? |
| D | random_l1 | policy | Does L2 filtering matter? |
| E | top | equal_weight | Does RL policy add value over naive? |
| F | top | random | Does learned policy beat random? |

**Expected ordering (if system works):** A > B > C, A > D, A > E, A > F

In [None]:
# Cell 8: RL Ablation — candidate mode × action mode
# Reuses trained PPO models from Cell 7. For random/equal_weight action modes,
# no model is loaded. All variants use inference_only=True.

import time
import pandas as pd
from screener.backtester import WalkForwardBacktester

ABLATION_VARIANTS = [
    ("A (baseline)",  "top",       "policy"),
    ("B (rand L2)",   "random_l2", "policy"),
    ("C (bottom L2)", "bottom_l2", "policy"),
    ("D (rand L1)",   "random_l1", "policy"),
    ("E (equal wt)",  "top",       "equal_weight"),
    ("F (random)",    "top",       "random"),
]

ablation_rows = []
ablation_navs = {}

for label, cand_mode, act_mode in ABLATION_VARIANTS:
    print(f"\n{'='*60}")
    print(f"  Ablation: {label}  (candidate={cand_mode}, action={act_mode})")
    print(f"{'='*60}")

    abl_bt = WalkForwardBacktester(cfg)
    t0 = time.time()
    res = abl_bt.run_rl(
        verbose=False,
        inference_only=True,
        candidate_mode=cand_mode,
        action_mode=act_mode,
    )
    elapsed = time.time() - t0

    m = res["metrics"]
    ablation_rows.append({
        "variant": label,
        "candidate_mode": cand_mode,
        "action_mode": act_mode,
        "total_return": m.get("total_return", 0.0),
        "sharpe": m.get("sharpe", 0.0),
        "max_drawdown": m.get("max_drawdown", 0.0),
        "wall_time_s": elapsed,
    })
    ablation_navs[label] = res["nav_series"]
    print(f"  Return: {m.get('total_return', 0):.4f}  "
          f"Sharpe: {m.get('sharpe', 0):.4f}  "
          f"MaxDD: {m.get('max_drawdown', 0):.4f}  "
          f"({elapsed:.0f}s)")

# ── Summary table ─────────────────────────────────────────────────
print(f"\n{'='*60}")
print("ABLATION SUMMARY")
print(f"{'='*60}")
abl_df = pd.DataFrame(ablation_rows)
abl_df = abl_df.set_index("variant")
print(abl_df[["candidate_mode", "action_mode", "total_return", "sharpe", "max_drawdown"]].to_string())

# ── NAV curves ────────────────────────────────────────────────────
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(14, 5))
for label, nav in ablation_navs.items():
    if len(nav) > 0:
        (nav / nav.iloc[0]).plot(ax=ax, label=label, linewidth=1.2)
ax.set_title("RL Ablation: Normalised NAV")
ax.set_ylabel("Growth of $1")
ax.legend(fontsize=9)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Cell 6: List past runs
runs_dir = os.path.join(cfg.drive_root, 'runs')
if os.path.isdir(runs_dir):
    runs = sorted(os.listdir(runs_dir), reverse=True)
    print(f'Past runs ({len(runs)}):')
    for r in runs:
        run_path = os.path.join(runs_dir, r)
        contents = os.listdir(run_path)
        print(f'  {r}  ({", ".join(sorted(contents))})')
else:
    print('No runs yet.')