# Phase 27: Research-Driven Trading System â€” Colab Training

**Fixes Applied:**
- ðŸ”´ P0: True DDQN (target net, soft-update Ï„=0.005)
- ðŸ”´ P0: Corrected volatility scaling (1/vol inverse)
- ðŸŸ  P1: Sequence 240, No warm-start, Trade-only Sharpe
- ðŸŸ¡ P2: SAE encoder (21â†’12 features), LR scheduler, Îµ=0.98

**Data:** `/content/drive/MyDrive/data/{symbol}.csv`

### GPU: Runtime > Change runtime type > **T4 GPU**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install pandas_ta tqdm matplotlib -q

In [None]:
import torch, torch.nn as nn, torch.optim as optim
import pandas as pd, pandas_ta as ta, numpy as np
import os, random, matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader

class Settings:
    PAIR_CONFIGS = {
        'XAUUSD': {'spread': 0.20, 'commission': 0.0, 'scaling_factor': 5.0, 'contract_size': 100},
        'EURUSD': {'spread': 0.0001, 'commission': 0.0, 'scaling_factor': 10000.0, 'contract_size': 100000},
        'GBPUSD': {'spread': 0.0002, 'commission': 0.0, 'scaling_factor': 10000.0, 'contract_size': 100000},
    }
    PAIRS = list(PAIR_CONFIGS.keys())
    SEQUENCE_LENGTH = 240  # P1: Fischer & Krauss
    FEATURES = [
        'ret_close_1', 'ret_close_2', 'ret_close_5', 'ret_close_10', 'ret_close_20',
        'body_ratio', 'upper_wick_ratio', 'lower_wick_ratio', 'range_ratio',
        'volatility_ratio', 'atr_normalized',
        'rsi_14', 'macd_signal_dist', 'roc_10',
        'dist_ema_50', 'dist_ema_200', 'ema_slope_50',
        'bb_position', 'bb_width',
        'adx_normalized', 'di_diff',
    ]
    INPUT_DIM = len(FEATURES)  # 21
    SAE_DIM = 12              # P2: SAE compressed
    HIDDEN_DIM = 64
    NUM_LAYERS = 2
    DROPOUT = 0.1
    OUTPUT_DIM = 7
    ACTION_MAP = [-1.0, -0.66, -0.33, 0.0, 0.33, 0.66, 1.0]
    MAX_LOT_SIZE = 0.05
    EPOCHS = 20
    BATCH_SIZE = 64
    LEARNING_RATE = 0.0001
    GAMMA = 0.99
    EPSILON_START = 1.0
    EPSILON_DECAY = 0.98     # P2: Slower
    EPSILON_MIN = 0.01
    TARGET_UPDATE_FREQ = 100  # P0: DDQN
    TAU = 0.005
    LR_SCHEDULER_PATIENCE = 3
    LR_SCHEDULER_FACTOR = 0.5
    PER_ALPHA = 0.6
    PER_BETA_START = 0.4
    SIGMA_TARGET = 0.02
    VOLATILITY_WINDOW = 20
    TRANSACTION_COST_BPS = 10
    TRAIN_WINDOW = 60000
    VAL_WINDOW = 10000
    TEST_WINDOW = 10000
    EARLY_STOP_PATIENCE = 10
    STANDARDIZE_WINDOW = 60

print(f'Config: {Settings.INPUT_DIM} feats â†’ SAE({Settings.SAE_DIM}) â†’ LSTM({Settings.HIDDEN_DIM}) â†’ {Settings.OUTPUT_DIM} actions')
print(f'Device: {torch.device("cuda" if torch.cuda.is_available() else "cpu")}')

In [None]:
class QNetwork(nn.Module):
    def __init__(self, input_dim=Settings.INPUT_DIM, sae_dim=Settings.SAE_DIM,
                 hidden_dim=Settings.HIDDEN_DIM, num_layers=Settings.NUM_LAYERS,
                 dropout=Settings.DROPOUT, output_dim=Settings.OUTPUT_DIM):
        super(QNetwork, self).__init__()
        self.sae_encoder = nn.Sequential(
            nn.Linear(input_dim, input_dim), nn.ReLU(),
            nn.Linear(input_dim, sae_dim), nn.ReLU())
        self.lstm = nn.LSTM(input_size=sae_dim, hidden_size=hidden_dim,
                           num_layers=num_layers, dropout=dropout if num_layers > 1 else 0, batch_first=True)
        self.layer_norm = nn.LayerNorm(hidden_dim)
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2), nn.ReLU(), nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, output_dim))

    def forward(self, x):
        bs, sl, _ = x.size()
        encoded = self.sae_encoder(x.reshape(bs * sl, -1)).reshape(bs, sl, -1)
        lstm_out, _ = self.lstm(encoded)
        return self.fc(self.layer_norm(lstm_out[:, -1, :]))

print('QNetwork with SAE encoder loaded.')

In [None]:
class SumTree:
    def __init__(self, capacity):
        self.capacity = capacity
        self.tree = np.zeros(2 * capacity - 1)
        self.data = np.zeros(capacity, dtype=object)
        self.write = 0
        self.count = 0
    def _propagate(self, idx, change):
        parent = (idx - 1) // 2
        self.tree[parent] += change
        if parent != 0: self._propagate(parent, change)
    def _retrieve(self, idx, s):
        left = 2 * idx + 1
        if left >= len(self.tree): return idx
        if s <= self.tree[left]: return self._retrieve(left, s)
        return self._retrieve(left + 1, s - self.tree[left])
    def total(self): return self.tree[0]
    def add(self, p, data):
        idx = self.write + self.capacity - 1
        self.data[self.write] = data
        self.update(idx, p)
        self.write = (self.write + 1) % self.capacity
        if self.count < self.capacity: self.count += 1
    def update(self, idx, p):
        change = p - self.tree[idx]
        self.tree[idx] = p
        self._propagate(idx, change)
    def get(self, s):
        idx = self._retrieve(0, s)
        return (idx, self.tree[idx], self.data[idx - self.capacity + 1])

class PrioritizedReplayBuffer:
    def __init__(self, capacity=10000, alpha=0.6):
        self.tree = SumTree(capacity)
        self.alpha = alpha
        self.capacity = capacity
    def push(self, *args):
        max_p = max(np.max(self.tree.tree[-self.tree.capacity:]), 1.0)
        self.tree.add(max_p, args)
    def sample(self, batch_size, beta=0.4):
        batch, idxs, priorities = [], [], []
        seg = self.tree.total() / batch_size
        for i in range(batch_size):
            s = random.uniform(seg * i, seg * (i + 1))
            idx, p, data = self.tree.get(s)
            if data is None or (isinstance(data, (int, float)) and data == 0):
                vi = random.randint(0, self.tree.count - 1)
                data = self.tree.data[vi]; idx = vi + self.capacity - 1; p = self.tree.tree[idx]
            priorities.append(p); batch.append(data); idxs.append(idx)
        sp = np.array(priorities) / self.tree.total()
        isw = np.power(self.tree.count * sp, -beta); isw /= isw.max()
        s, ns, cp, np_, v = zip(*batch)
        return np.array(s), np.array(ns), np.array(cp, dtype=np.float32), np.array(np_, dtype=np.float32), np.array(v, dtype=np.float32), idxs, np.array(isw, dtype=np.float32)
    def update_priorities(self, idxs, errors):
        for idx, e in zip(idxs, errors): self.tree.update(idx, (abs(e) + 1e-5) ** self.alpha)
    def __len__(self): return self.tree.count

print('PER loaded.')

In [None]:
def prepare_features(df):
    if df.empty: return df
    df = df.copy()
    df['mid_price'] = (df['high'] + df['low']) / 2.0
    df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
    df['atr'] = df['atr'].fillna(method='bfill').fillna(df['close'] * 0.001)
    df['volatility'] = df['atr'] / df['close']
    for lag in [1, 2, 5, 10, 20]:
        raw_ret = df['close'].pct_change(lag)
        rm = raw_ret.rolling(Settings.STANDARDIZE_WINDOW).mean()
        rs = raw_ret.rolling(Settings.STANDARDIZE_WINDOW).std().replace(0, np.nan).fillna(raw_ret.std())
        df[f'ret_close_{lag}'] = (raw_ret - rm) / rs
    df['body_ratio'] = (df['close'] - df['open']) / df['atr']
    df['upper_wick_ratio'] = (df['high'] - df[['open', 'close']].max(axis=1)) / df['atr']
    df['lower_wick_ratio'] = (df[['open', 'close']].min(axis=1) - df['low']) / df['atr']
    df['range_ratio'] = (df['high'] - df['low']) / df['atr']
    rv = df['close'].pct_change().rolling(Settings.VOLATILITY_WINDOW).std()
    rvm = rv.rolling(100).mean().replace(0, np.nan).fillna(rv.mean())
    df['volatility_ratio'] = rv / rvm
    df['atr_normalized'] = df['atr'] / df['close']
    rsi = ta.rsi(df['close'], length=14)
    df['rsi_14'] = rsi / 100.0 if rsi is not None else 0.0
    macd_df = ta.macd(df['close'], fast=12, slow=26, signal=9)
    if macd_df is not None and not macd_df.empty:
        mc = [c for c in macd_df.columns if 'MACDh' in c or 'MACD_' in c]
        sc = [c for c in macd_df.columns if 'MACDs' in c]
        df['macd_signal_dist'] = (macd_df[mc[0]] - macd_df[sc[0]]) / df['atr'] if mc and sc else 0.0
    else: df['macd_signal_dist'] = 0.0
    roc = ta.roc(df['close'], length=10)
    df['roc_10'] = roc / 100.0 if roc is not None else 0.0
    e50 = ta.ema(df['close'], length=50)
    e200 = ta.ema(df['close'], length=200)
    df['ema_200'] = e200
    df['dist_ema_50'] = (df['close'] - e50) / df['atr']
    df['dist_ema_200'] = (df['close'] - e200) / df['atr']
    df['ema_slope_50'] = (e50 - e50.shift(5)) / df['atr']
    bb = ta.bbands(df['close'], length=20, std=2)
    if bb is not None and not bb.empty:
        bbl, bbm, bbu = bb.iloc[:, 0], bb.iloc[:, 1], bb.iloc[:, 2]
        bbr = (bbu - bbl).replace(0, np.nan).fillna(1e-8)
        df['bb_position'] = (df['close'] - bbl) / bbr
        df['bb_width'] = (bbu - bbl) / df['close']
    else: df['bb_position'] = 0.5; df['bb_width'] = 0.0
    adx_df = ta.adx(df['high'], df['low'], df['close'], length=14)
    if adx_df is not None and not adx_df.empty:
        df['adx_normalized'] = adx_df.iloc[:, 0] / 100.0
        df['di_diff'] = (adx_df.iloc[:, 1] - adx_df.iloc[:, 2]) / 100.0 if len(adx_df.columns) >= 3 else 0.0
    else: df['adx_normalized'] = 0.0; df['di_diff'] = 0.0
    df.fillna(0, inplace=True)
    df.replace([np.inf, -np.inf], 0, inplace=True)
    for f in Settings.FEATURES:
        if f in df.columns: df[f] = df[f].clip(-10, 10)
    df = df.iloc[250:]
    return df

print('Feature engineering loaded.')

In [None]:
class TradingDataset(Dataset):
    def __init__(self, feature_data, close_prices, volatilities, seq_len):
        self.feature_data = feature_data
        self.close_prices = close_prices
        self.volatilities = volatilities
        self.seq_len = seq_len
        self.valid_indices = range(seq_len, len(feature_data) - 1)
    def __len__(self): return len(self.valid_indices)
    def __getitem__(self, idx):
        i = self.valid_indices[idx]
        return {
            'state': torch.FloatTensor(self.feature_data[i - self.seq_len : i].copy()),
            'next_state': torch.FloatTensor(self.feature_data[i - self.seq_len + 1 : i + 1].copy()),
            'curr_price': torch.tensor(self.close_prices[i-1], dtype=torch.float32),
            'next_price': torch.tensor(self.close_prices[i], dtype=torch.float32),
            'volatility': torch.tensor(self.volatilities[i-1], dtype=torch.float32)
        }
print('Dataset loaded.')

In [None]:
def soft_update(policy_net, target_net, tau=Settings.TAU):
    for tp, pp in zip(target_net.parameters(), policy_net.parameters()):
        tp.data.copy_(tau * pp.data + (1.0 - tau) * tp.data)

def train_model(symbol, csv_path):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'\nTraining {symbol} on {device}...')
    df = pd.read_csv(csv_path, parse_dates=['time'], index_col='time')
    df = prepare_features(df)
    if df.empty: print('No data.'); return
    print(f'Data: {len(df)} bars')

    total = len(df)
    wt = Settings.TRAIN_WINDOW + Settings.VAL_WINDOW + Settings.TEST_WINDOW
    if total < wt:
        te = int(total * 0.7); ve = int(total * 0.85)
        windows = [(0, te, ve, total)]
    else:
        windows = []; s = 0
        while s + wt <= total:
            t_ = s + Settings.TRAIN_WINDOW; v_ = t_ + Settings.VAL_WINDOW; ts_ = v_ + Settings.TEST_WINDOW
            windows.append((s, t_, v_, ts_)); s += Settings.TEST_WINDOW
        if len(windows) > 5: windows = windows[-5:]
    print(f'Windows: {len(windows)}')

    best_val_sharpe = -np.inf
    best_model_state = None
    loss_fn = nn.MSELoss(reduction='none')
    SF = Settings.PAIR_CONFIGS[symbol]['scaling_factor']

    for w_idx, (ws, wte, wve, wtse) in enumerate(windows):
        print(f'\n--- Window {w_idx+1}/{len(windows)} ---')
        df_t = df.iloc[ws:wte]; df_v = df.iloc[wte:wve]
        tf = df_t[Settings.FEATURES].values; tp = df_t['mid_price'].values
        tv = df_t['volatility_ratio'].values if 'volatility_ratio' in df_t.columns else np.ones(len(df_t))
        tds = TradingDataset(tf, tp, tv, Settings.SEQUENCE_LENGTH)
        vf = df_v[Settings.FEATURES].values; vp = df_v['mid_price'].values
        vv = df_v['volatility_ratio'].values if 'volatility_ratio' in df_v.columns else np.ones(len(df_v))
        vds = TradingDataset(vf, vp, vv, Settings.SEQUENCE_LENGTH)
        if len(tds) < Settings.BATCH_SIZE: continue

        mem = PrioritizedReplayBuffer(capacity=len(tds), alpha=Settings.PER_ALPHA)
        tl = DataLoader(tds, batch_size=4096, shuffle=False)
        for b in tqdm(tl, desc='Fill'):
            s_, ns_, cp_, np__, v_ = b['state'].numpy(), b['next_state'].numpy(), b['curr_price'].numpy(), b['next_price'].numpy(), b['volatility'].numpy()
            for i in range(len(s_)): mem.push(s_[i], ns_[i], cp_[i], np__[i], v_[i])

        # P1: No warm-start â€” fresh network per window
        policy_net = QNetwork().to(device)
        # P0: Target network
        target_net = QNetwork().to(device)
        target_net.load_state_dict(policy_net.state_dict()); target_net.eval()

        optimizer = optim.Adam(policy_net.parameters(), lr=Settings.LEARNING_RATE)
        # P2: LR scheduler
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
            patience=Settings.LR_SCHEDULER_PATIENCE, factor=Settings.LR_SCHEDULER_FACTOR)

        epsilon = Settings.EPSILON_START
        steps = len(tds) // Settings.BATCH_SIZE
        best_vl = np.inf; pat = 0; gs = 0
        am = torch.tensor(Settings.ACTION_MAP, device=device)

        for epoch in range(Settings.EPOCHS):
            tl_ = 0
            beta = Settings.PER_BETA_START + (1.0 - Settings.PER_BETA_START) * (epoch / Settings.EPOCHS)
            pbar = tqdm(range(steps), desc=f'W{w_idx+1} E{epoch+1}')
            for _ in pbar:
                ss, nss, cps, nps, vs, idxs, isw = mem.sample(Settings.BATCH_SIZE, beta)
                st = torch.FloatTensor(ss).to(device)
                nst = torch.FloatTensor(nss).to(device)
                cpt = torch.FloatTensor(cps).to(device)
                npt = torch.FloatTensor(nps).to(device)
                vt = torch.FloatTensor(vs).to(device)
                wt_ = torch.FloatTensor(isw).to(device)
                bs = st.size(0)
                if random.random() < epsilon:
                    at = torch.randint(0, Settings.OUTPUT_DIM, (bs,), device=device)
                else:
                    with torch.no_grad(): at = torch.argmax(policy_net(st), dim=1)
                pos = am[at]
                pc = (npt - cpt) * SF
                # P0: Corrected vol scaling
                vs_ = vt.clamp(min=0.2)
                vsc = (1.0 / vs_).clamp(0.2, 3.0)
                cost = Settings.TRANSACTION_COST_BPS * 0.0001 * SF * torch.abs(pos)
                reward = (vsc * pos * pc - cost).clamp(-5.0, 5.0)
                cq = policy_net(st).gather(1, at.unsqueeze(1)).squeeze(1)
                with torch.no_grad():
                    # P0: True DDQN
                    na = policy_net(nst).argmax(1)
                    nq = target_net(nst).gather(1, na.unsqueeze(1)).squeeze(1)
                    tq = reward + Settings.GAMMA * nq
                loss = (loss_fn(cq, tq) * wt_).mean()
                optimizer.zero_grad(); loss.backward()
                torch.nn.utils.clip_grad_norm_(policy_net.parameters(), 1.0)
                optimizer.step()
                mem.update_priorities(idxs, torch.abs(tq - cq).detach().cpu().numpy())
                tl_ += loss.item(); gs += 1
                if gs % Settings.TARGET_UPDATE_FREQ == 0:
                    soft_update(policy_net, target_net)
            if epsilon > Settings.EPSILON_MIN: epsilon *= Settings.EPSILON_DECAY
            al = tl_ / steps
            scheduler.step(al)
            print(f'W{w_idx+1} E{epoch+1}: Loss={al:.6f} Eps={epsilon:.4f} LR={optimizer.param_groups[0]["lr"]:.2e}')

            # Validation (P1: trade-only Sharpe)
            policy_net.eval()
            vl = DataLoader(vds, batch_size=Settings.BATCH_SIZE, shuffle=False)
            all_r = []; trades = 0
            with torch.no_grad():
                for vb in vl:
                    vs_t = vb['state'].to(device).float()
                    cpv = vb['curr_price'].to(device).float()
                    npv = vb['next_price'].to(device).float()
                    a = torch.argmax(policy_net(vs_t), dim=1)
                    p = am[a]; r = p * (npv - cpv) * SF
                    trades += (a != 3).sum().item()
                    all_r.extend(r.cpu().numpy().tolist())
            ra = np.array(all_r)
            tr = ra[ra != 0]  # P1: trade-only
            sharpe = (np.mean(tr) / np.std(tr) * np.sqrt(252*12*24)) if len(tr) > 1 and np.std(tr) > 0 else 0
            print(f'[VAL] Trades: {trades}, PnL: {ra.sum():.5f}, Sharpe: {sharpe:.4f}')
            policy_net.train()
            if al < best_vl:
                best_vl = al; pat = 0
                if sharpe > best_val_sharpe:
                    best_val_sharpe = sharpe
                    best_model_state = {k: v.clone() for k, v in policy_net.state_dict().items()}
            else:
                pat += 1
                if pat >= Settings.EARLY_STOP_PATIENCE: print(f'Early stop E{epoch+1}'); break

    sp = os.path.join(os.path.dirname(csv_path), f'{symbol}_brain.pth')
    torch.save(best_model_state if best_model_state else policy_net.state_dict(), sp)
    print(f'Model saved: {sp} (Sharpe: {best_val_sharpe:.4f})')

In [None]:
# âš¡ SELECT PAIRS:
pairs = ['EURUSD']
drive_data = '/content/drive/MyDrive/data'

for sym in pairs:
    csv = os.path.join(drive_data, f'{sym}.csv')
    if os.path.exists(csv):
        print(f'Found: {csv}')
        train_model(sym, csv)
    else:
        print(f'{sym} data not found.')