# Multi-Layer Stock Screener + Trading Agent

**Architecture:** All A-shares → Factor Timing (XGBoost) → Technical Ranker (XGBRanker) → Kronos Prediction → Paper Trading

Data: local baostock OHLCV pickles (5,165 stocks, 2015–2026). No Qlib dependency.

In [None]:
# Cell 0: Install dependencies & setup output directory
!pip install xgboost pandas_ta torch transformers akshare -q

import os
os.makedirs('output/screener/models', exist_ok=True)

In [None]:
# Cell 1: Download all A-share OHLCV via baostock (one-time, ~5.5 hours)
# Skip this cell on subsequent runs — data persists in data/ directory.
from screener.download_ohlcv import get_all_a_shares, download_universe_ohlcv, download_benchmark

SAVE_DIR = 'data'

symbols = get_all_a_shares()
print(f'Found {len(symbols)} A-share stocks')

ohlcv = download_universe_ohlcv(
    symbols,
    start_date='20150101',
    end_date='20260225',
    save_path=f'{SAVE_DIR}/ohlcv_all_a.pkl',
)
print(f'Done. {len(ohlcv)} stocks saved.')

# Benchmark (CSI500 index)
download_benchmark('000905', '20150101', '20260225', f'{SAVE_DIR}/benchmark_000905.pkl')

In [None]:
# Cell 2: Import screener modules & init config
import sys
sys.path.insert(0, '.')  # adjust to your repo path

from screener.config import ScreenerConfig
from screener.data_pipeline import (
    init_data, load_alpha158_factors, load_alpha158_labels,
    load_market_regime_features, load_raw_ohlcv, get_calendar,
)
from screener.factor_timing_model import FactorTimingModel
from screener.technical_ranker import TechnicalRanker
from screener.kronos_screener import KronosScreener
from screener.paper_trader import PaperTrader
from screener.backtester import WalkForwardBacktester

cfg = ScreenerConfig()
init_data(cfg)
print('Data loaded.')

In [None]:
# Cell 3: Compute Alpha158 factors (cached to Drive)
alpha158 = load_alpha158_factors(cfg)
labels = load_alpha158_labels(cfg)
regime = load_market_regime_features(cfg)

print(f'Alpha158 shape: {alpha158.shape}')
print(f'Labels shape:   {labels.shape}')
print(f'Regime shape:   {regime.shape}')

In [None]:
# Cell 4: Train Layer 1 — Factor Timing Model
layer1 = FactorTimingModel(cfg)
X, Y = layer1.build_training_data(alpha158, regime)
layer1.train(X, Y)

# Validate
layer1.validate(X, Y, cfg.val_start, cfg.val_end)
layer1.save()

In [None]:
# Cell 5: Train Layer 2 — Technical Ranker
import pandas as pd

# Load OHLCV for training
all_symbols = list(alpha158.index.get_level_values('instrument').unique())
ohlcv = load_raw_ohlcv(all_symbols[:500], cfg.train_start, cfg.backtest_end, cfg)  # subset for speed

# Compute forward returns
close_dict = {sym: df['close'] for sym, df in ohlcv.items()}
close_df = pd.DataFrame(close_dict)
fwd_ret = close_df.shift(-cfg.layer2_forward_days) / close_df - 1

# Training dates (every 5th day for speed)
cal = get_calendar(cfg, cfg.train_start, cfg.train_end)
train_dates = list(cal[::5])

layer2 = TechnicalRanker(cfg)
X2, y2, g2 = layer2.build_training_data(ohlcv, train_dates, fwd_ret)
layer2.train(X2, y2, g2)

# Validate on held-out dates
val_cal = get_calendar(cfg, cfg.val_start, cfg.val_end)
val_dates = list(val_cal[::5])
Xv, yv, gv = layer2.build_training_data(ohlcv, val_dates, fwd_ret)
if len(Xv) > 0:
    layer2.validate(Xv, yv, gv)
layer2.save()

In [None]:
# Cell 6: Load Kronos models to GPU
import torch
print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'Memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB')

layer3 = KronosScreener(cfg, device='cuda:0' if torch.cuda.is_available() else 'cpu')
try:
    layer3.load_model()
    print('Kronos loaded successfully.')
except Exception as e:
    print(f'Kronos load failed (update paths in config): {e}')

In [None]:
# Cell 7: Run daily screening pipeline (single-day inference demo)
import pandas as pd

test_date = pd.Timestamp('2024-01-15')

# Layer 1
layer1_picks = layer1.select_top(test_date, alpha158_df=alpha158)
print(f'Layer 1: {len(layer1_picks)} stocks')

# Layer 2
layer2_picks = layer2.select_top(ohlcv, layer1_picks, test_date, include_news=False)
print(f'Layer 2: {len(layer2_picks)} stocks')
print('Top 10:', layer2_picks[:10])

# Layer 3 (Kronos) — only if model loaded
if layer3.predictor is not None:
    scores = layer3.screen_stocks(ohlcv, layer2_picks, test_date)
    print(f'\nLayer 3 (Kronos) results:')
    print(scores.head(10))
else:
    print('\nSkipping Layer 3 (Kronos not loaded)')

In [None]:
# Cell 8: Paper trading dashboard
trader = PaperTrader(cfg)
metrics = trader.get_metrics()
print('Paper Trading Metrics:')
for k, v in metrics.items():
    print(f'  {k}: {v}')

In [None]:
# Cell 9: Full walk-forward backtest
backtester = WalkForwardBacktester(cfg)

# Set run_kronos=False for a fast test without GPU inference
results = backtester.run(run_kronos=False, verbose=True)

backtester.save_results(results)

In [None]:
# Cell 10: Results visualization
import matplotlib.pyplot as plt

nav = results['nav_series']
metrics = results['metrics']

fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# NAV curve
ax = axes[0]
nav.plot(ax=ax, label='Screener NAV', linewidth=1.5)
ax.axhline(cfg.initial_capital, color='gray', linestyle='--', label='Initial Capital')
ax.set_title(f'Portfolio NAV  |  Return: {metrics["total_return"]*100:.2f}%  |  '
             f'Sharpe: {metrics["sharpe"]:.2f}  |  MaxDD: {metrics["max_drawdown"]*100:.2f}%')
ax.set_ylabel('NAV (CNY)')
ax.legend()
ax.grid(True, alpha=0.3)

# Drawdown
ax = axes[1]
cummax = nav.cummax()
dd = (nav - cummax) / cummax * 100
dd.plot(ax=ax, color='red', alpha=0.7, label='Drawdown')
ax.fill_between(dd.index, dd.values, 0, alpha=0.2, color='red')
ax.set_title('Drawdown')
ax.set_ylabel('Drawdown (%)')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('output/screener/backtest_results.png', dpi=150)
plt.show()

# Layer attribution bar chart
attr = results['layer_attribution']
names = list(attr.keys())
returns = [attr[n]['mean_5d_return'] * 100 for n in names]

fig, ax = plt.subplots(figsize=(8, 5))
bars = ax.bar(names, returns, color=['gray', 'steelblue', 'darkorange', 'green'])
ax.set_title('Layer Attribution: Avg 5-Day Forward Return (%)')
ax.set_ylabel('Return (%)')
ax.grid(axis='y', alpha=0.3)
for bar, r in zip(bars, returns):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
            f'{r:.3f}%', ha='center', va='bottom', fontsize=10)
plt.tight_layout()
plt.savefig('output/screener/layer_attribution.png', dpi=150)
plt.show()

# Trade summary
print(f'\nTrade count: {metrics["trade_count"]}')
print(f'Win rate: {metrics["win_rate"]*100:.1f}%')
print(f'Total P&L: {metrics["total_pnl"]:,.0f}')
print(f'Total commission: {metrics["total_commission"]:,.0f}')