# Deep Multi-Pair Forex Trading System - Colab Training

This notebook trains the LSTM-DQN model using data stored in your Google Drive.
**Data Location:** `/content/drive/MyDrive/data/{symbol}.csv`

### ⚡ enable GPU
To speed up training, ensure you really are using a GPU Runtime:
1. In the menu, go to **Runtime** > **Change runtime type**.
2. Select **T4 GPU** (or better) under Hardware accelerator.
3. Click **Save**.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Verify GPU
import torch
if torch.cuda.is_available():
    print(f"✅ GPU Available: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ GPU NOT Detected! Please enable it in Runtime > Change runtime type.")

In [None]:
# Install dependencies
!pip install pandas_ta

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import pandas_ta as ta
import numpy as np
from tqdm import tqdm
import os
from torch.utils.data import Dataset, DataLoader
import random
import matplotlib.pyplot as plt

In [None]:
# --- CONFIGURATION ---
class Settings:
    # --- TRADING PAIRS & PROFILES ---
    # OPTIMIZATION: Zero Commission & Balanced Scaling
    PAIR_CONFIGS = {
        'XAUUSD': {
            'spread': 0.20,
            'commission': 0.0,       # Zero Comm
            'scaling_factor': 5.0,   # Increased High Scaling for Gold (Difficulty Balance)
            'contract_size': 100
        },
        'EURUSD': {
            'spread': 0.0001,
            'commission': 0.0,       # Zero Comm
            'scaling_factor': 10000.0,
            'contract_size': 100000
        },
        'GBPUSD': {
            'spread': 0.0002,
            'commission': 0.0,       # Zero Comm
            'scaling_factor': 10000.0,
            'contract_size': 100000
        }
    }
    PAIRS = list(PAIR_CONFIGS.keys())
    
    # Core
    SEQUENCE_LENGTH = 12 # Scalper: React to last hour only
    # ADX added (Input Dim = 7)
    FEATURES = ['log_ret', 'dist_ema', 'rsi', 'roc', 'volatility', 'hour', 'adx']
    INPUT_DIM = len(FEATURES)
    HIDDEN_DIM = 128
    NUM_LAYERS = 2
    DROPOUT = 0.2
    OUTPUT_DIM = 3 

    # Training
    EPOCHS = 50 
    BATCH_SIZE = 64
    LEARNING_RATE = 0.001 # Aggressive Learning for Binary Scalper
    GAMMA = 0.85  # Scalper: Greedy for immediate reward
    EPSILON_START = 1.0
    EPSILON_DECAY = 0.92
    EPSILON_MIN = 0.01
    
    # Data
    TRAIN_SPLIT_INDEX = 420000
    ATR_PERIOD = 14

In [None]:
# --- MODEL (Brain) ---
class QNetwork(nn.Module):
    def __init__(self, input_dim=Settings.INPUT_DIM, 
                 hidden_dim=Settings.HIDDEN_DIM, 
                 num_layers=Settings.NUM_LAYERS, 
                 dropout=Settings.DROPOUT, 
                 output_dim=Settings.OUTPUT_DIM):
        super(QNetwork, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_step_out = lstm_out[:, -1, :]
        q_values = self.fc(last_step_out)
        return q_values

In [None]:
# --- DATA PROCESSING ---
def prepare_features(df):
    df = df.copy()
    # Log Returns (Scaled x1000)
    df['log_ret'] = np.log(df['close'] / df['close'].shift(1)) * 1000.0
    # EMA Distance (Scaled x1000)
    ema50 = ta.ema(df['close'], length=50)
    df['dist_ema'] = ((df['close'] - ema50) / df['close']) * 1000.0
    # RSI
    df['rsi'] = ta.rsi(df['close'], length=14) / 100.0
    # ROC (Velocity) - Scaled x1000 (New "Godlike" Feature)
    df['roc'] = ta.roc(df['close'], length=3) * 10.0
    
    # ADX (Trend Strength) - Range 0-100 -> Normalized 0-1
    # 0-25: Choppy/Dead, 25+: Trending, 50+: Strong Trend
    adx = ta.adx(df['high'], df['low'], df['close'], length=14)
    # pandas_ta returns ADX_14, DMP_14, DMN_14
    df['adx'] = adx['ADX_14'] / 100.0
    
    df.fillna(0, inplace=True) # FIXED: Handle NaNs from ROC & ADX
    
    # Volatility (Scaled x1000)
    atr = ta.atr(df['high'], df['low'], df['close'], length=Settings.ATR_PERIOD)
    df['volatility'] = (atr / df['close']) * 1000.0
    # Time
    df['hour'] = df.index.hour / 23.0

    # Bollinger Bands
    bb = ta.bbands(df['close'], length=20, std=2)
    bb.columns = ['BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0']
    df = pd.concat([df, bb], axis=1)
    
    df.dropna(inplace=True)
    return df

class TradingDataset(Dataset):
    def __init__(self, feature_data, close_prices, seq_len):
        self.feature_data = feature_data
        self.close_prices = close_prices
        self.seq_len = seq_len
        self.valid_indices = range(seq_len, len(feature_data) - 1)

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, idx):
        i = self.valid_indices[idx]
        state_window = self.feature_data[i - self.seq_len : i]
        next_state_window = self.feature_data[i - self.seq_len + 1 : i + 1]
        curr_price = self.close_prices[i-1]
        next_price = self.close_prices[i]

        return {
            'state': torch.FloatTensor(state_window),
            'next_state': torch.FloatTensor(next_state_window),
            'curr_price': torch.tensor(curr_price, dtype=torch.float32),
            'next_price': torch.tensor(next_price, dtype=torch.float32)
        }

In [None]:
# --- TRAINING ENGINE ---
def train_model(symbol, csv_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Training {symbol} on {device}...")
    
    # Load Profile Constants
    if symbol not in Settings.PAIR_CONFIGS:
         print(f"Pair {symbol} not in config! Defaulting to XAUUSD.")
         profile = Settings.PAIR_CONFIGS['XAUUSD']
    else:
         profile = Settings.PAIR_CONFIGS[symbol]
         
    SCALING_FACTOR = profile['scaling_factor']
    SPREAD = profile['spread']
    COMMISSION = profile['commission']
    
    # Load Data
    if not os.path.exists(csv_path):
        print(f"File {csv_path} not found.")
        return
        
    df = pd.read_csv(csv_path, parse_dates=['time'], index_col='time')
    # FIX: Prepare Features BEFORE Dataset Creation
    df = prepare_features(df)
    
    feature_data = df[Settings.FEATURES].values
    close_prices = df['close'].values
    
    dataset = TradingDataset(feature_data, close_prices, Settings.SEQUENCE_LENGTH)
    dataloader = DataLoader(dataset, batch_size=Settings.BATCH_SIZE, shuffle=True, drop_last=True, pin_memory=True)
    
    policy_net = QNetwork().to(device)
    optimizer = optim.Adam(policy_net.parameters(), lr=Settings.LEARNING_RATE)
    loss_fn = nn.MSELoss()
    
    epsilon = Settings.EPSILON_START
    
    loss_history = []
    
    for epoch in range(Settings.EPOCHS):
        total_loss = 0
        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{Settings.EPOCHS}")
        
        for batch in pbar:
            state_tensor = batch['state'].to(device, non_blocking=True)
            next_state_tensor = batch['next_state'].to(device, non_blocking=True)
            curr_price = batch['curr_price'].to(device, non_blocking=True)
            next_price = batch['next_price'].to(device, non_blocking=True)
            
            batch_size = state_tensor.size(0)
            
            # Epsilon Greedy
            if random.random() < epsilon:
                action_tensor = torch.randint(0, Settings.OUTPUT_DIM, (batch_size,), device=device)
            else:
                with torch.no_grad():
                    q_values = policy_net(state_tensor)
                    action_tensor = torch.argmax(q_values, dim=1)
            
            # --- Reward Calculation (Pip Normalization) ---
            # 1. Normalized PnL
            price_diff = next_price - curr_price
            norm_pnl = price_diff * SCALING_FACTOR
            
            # 2. Normalized Costs
            norm_spread = SPREAD * SCALING_FACTOR
            norm_comm = COMMISSION * SCALING_FACTOR
            total_cost = norm_spread + norm_comm
            
            # 3. Base Penalty
            # HYBRID FIX: Small penalty to prevent laziness, but not force desperation.
            base_penalty = -0.1
            reward_tensor = torch.full((batch_size,), base_penalty, device=device)
            
            # Masks
            is_buy = (action_tensor == 1)
            is_sell = (action_tensor == 2)
            
            # 4. Final Reward Calculation (Hybrid Tanh)
            # Tanh squashes result between -1 and 1.
            # Small wins (0.1) -> ~0.1 reward
            # Big wins (10.0) -> ~1.0 reward (Capped)
            
            # Buy Logic
            buy_net = (norm_pnl - total_cost)
            buy_final = torch.tanh(buy_net)
            reward_tensor[is_buy] = buy_final[is_buy]
            
            # Sell Logic
            sell_net = (-norm_pnl - total_cost)
            sell_final = torch.tanh(sell_net)
            reward_tensor[is_sell] = sell_final[is_sell]
            
            # Update
            current_q = policy_net(state_tensor).gather(1, action_tensor.unsqueeze(1))
            with torch.no_grad():
                next_q = policy_net(next_state_tensor).max(1)[0]
                target_q = reward_tensor + (Settings.GAMMA * next_q)
                
            loss = loss_fn(current_q.squeeze(1), target_q)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
        if epsilon > Settings.EPSILON_MIN:
            epsilon *= Settings.EPSILON_DECAY
            
        avg_loss = total_loss / len(dataloader)
        loss_history.append(avg_loss)
        print(f"Avg Loss: {avg_loss:.6f}, Epsilon: {epsilon:.4f}")
        
    drive_save_path = os.path.join(os.path.dirname(csv_path), f"{symbol}_brain.pth")
    torch.save(policy_net.state_dict(), drive_save_path)
    print(f"Model Saved to {drive_save_path}!")
    
    plt.plot(loss_history)
    plt.title(f"Training Loss - {symbol}")
    plt.show()

In [None]:
# --- EXECUTION ---
drive_data_path = "/content/drive/MyDrive/data"

# ⚡ SELECT PAIRS TO TRAIN HERE:
pairs = ['EURUSD'] # Change to ['XAUUSD'] or Settings.PAIRS for all

print(f"Training Pairs: {pairs}")

for symbol in pairs:
    csv_path = os.path.join(drive_data_path, f"{symbol}.csv")
    if os.path.exists(csv_path):
        print(f"Found data for {symbol} at: {csv_path}")
        train_model(symbol, csv_path)
    else:
        print(f"Data for {symbol} not found. Skipping.")


In [None]:
# --- BACKTEST ENGINE (Colab Version - Optimized) ---

def colab_backtest(symbol, csv_path):
    print(f"\n--- Starting Backtest for {symbol} ---")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model_path = os.path.join(os.path.dirname(csv_path), f"{symbol}_brain.pth")
    
    if not os.path.exists(model_path):
        print(f"Model {model_path} not found. Please train first.")
        return

    policy_net = QNetwork().to(device)
    try:
        policy_net.load_state_dict(torch.load(model_path, map_location=device))
    except RuntimeError as e:
        print(f"\n!!! MODEL ARCHITECTURE MISMATCH !!!")
        print(f"Error: {e}")
        print(f"Cause: You are trying to load an OLD model (missing features) into the NEW code.")
        print(f"Solution: You MUST run the 'TRAINING' cells above to retrain the model with the new features.")
        return
        
    policy_net.eval()
    print("Model loaded.")
    
    df = pd.read_csv(csv_path, parse_dates=['time'], index_col='time')
    test_start_idx = Settings.TRAIN_SPLIT_INDEX 
    if len(df) < test_start_idx + 100:
        print(f"Not enough data for backtest. Total: {len(df)}, Needed: >{test_start_idx}")
        return
        
    print(f"Backtesting on rows {test_start_idx} to {len(df)}...")
    
    # FIX (Data Leakage): Prepare Features BEFORE Split
    df = prepare_features(df)
    
    df_test = df.iloc[test_start_idx:].copy()
    
    feature_data = df_test[Settings.FEATURES].values
    opens = df_test['open'].values
    highs = df_test['high'].values
    lows = df_test['low'].values
    closes = df_test['close'].values
    times = df_test.index
    atrs = df_test['volatility'].values * closes

    balance = 10000.0
    equity_curve = [balance]
    position = 0 
    entry_price = 0.0
    stop_loss = 0.0
    
    # Config Loading applied here
    contract_size = 100
    spread_cost = 0.20
    txn_cost = 0.0
    lot_size = 0.01
    
    if symbol in Settings.PAIR_CONFIGS:
         profile = Settings.PAIR_CONFIGS[symbol]
         contract_size = profile['contract_size']
         spread_cost = profile['spread'] * contract_size * lot_size
         txn_cost = profile['commission'] * contract_size * lot_size
    
    trades = []
    
    for t in tqdm(range(Settings.SEQUENCE_LENGTH, len(df_test) - 1)):
        state_tensor = torch.FloatTensor(feature_data[t - Settings.SEQUENCE_LENGTH : t]).unsqueeze(0).to(device)
        
        with torch.no_grad():
            q = policy_net(state_tensor)
            action = torch.argmax(q, dim=1).item()
            
            if action != 0: 
                 # --- FILTERS ---
                 current_time = times[t]
                 
                 # 1. ADX FILTER (Dead Market Avoidance)
                 # ADX is the LAST feature (index -1)
                 # FIX: Index [0, -1, -1] for Scalar
                 current_adx = state_tensor[0, -1, -1].item() * 100.0 # Un-normalize
                 if current_adx < 25:
                     action = 0 # Match is flat. Sit on hands.
                 
                 # 2. BOLLINGER SQUEEZE FILTER
                 prev_time = times[t-1]
                 try:
                     bb_u = df_test.loc[prev_time, 'BBU_20_2.0']
                     bb_l = df_test.loc[prev_time, 'BBL_20_2.0']
                     prev_close = df_test.loc[prev_time, 'close']
                         
                     width = bb_u - bb_l
                     vol_pct = width / prev_close
                     if vol_pct < 0.0005:
                          action = 0
                 except KeyError:
                     pass
            
        # Execution at Open t
        next_open = opens[t]
        next_high = highs[t]
        next_low = lows[t]
        next_time = times[t]
        atr = atrs[t-1] 
        
        trade_closed = False
        exit_price = 0.0
        
        if position != 0:
            # Time Hard Exit (20:00)
            if next_time.hour >= 20 and next_time.minute == 0:
                exit_price = next_open
                trade_closed = True
            elif position == 1:
                 if next_low <= stop_loss:
                      exit_price = stop_loss
                      trade_closed = True
                 elif (next_high - entry_price) > (1.0 * atr) and stop_loss < entry_price:
                      stop_loss = entry_price
            elif position == -1:
                 if next_high >= stop_loss:
                      exit_price = stop_loss
                      trade_closed = True
                 elif (entry_price - next_low) > (1.0 * atr) and stop_loss > entry_price:
                      stop_loss = entry_price
                
            if trade_closed:
                if position == 1:
                     gross_pnl = (exit_price - entry_price) * contract_size * lot_size
                else:
                     gross_pnl = (entry_price - exit_price) * contract_size * lot_size
                     
                net_pnl = gross_pnl - txn_cost
                balance += net_pnl
                trades.append(net_pnl)
                position = 0
        
        if not trade_closed:
            if action == 1: 
                if position == -1: 
                     exit_price = next_open
                     gross_pnl = (entry_price - exit_price) * contract_size * lot_size
                     net_pnl = gross_pnl - txn_cost
                     balance += net_pnl
                     trades.append(net_pnl)
                     
                     position = 1
                     entry_price = next_open
                     stop_loss = entry_price - (atr * 2.5)
                elif position == 0:
                     position = 1
                     entry_price = next_open
                     stop_loss = entry_price - (atr * 2.5)
            elif action == 2: 
                if position == 1: 
                     exit_price = next_open
                     gross_pnl = (exit_price - entry_price) * contract_size * lot_size
                     net_pnl = gross_pnl - txn_cost
                     balance += net_pnl
                     trades.append(net_pnl)
                     
                     position = -1
                     entry_price = next_open
                     stop_loss = entry_price + (atr * 2.5)
                elif position == 0:
                     position = -1
                     entry_price = next_open
                     stop_loss = entry_price + (atr * 2.5)
                     
        equity_curve.append(balance)
        
    win_rate = sum(1 for t in trades if t > 0) / len(trades) * 100 if trades else 0
    print(f"\nFinal Balance: ${balance:.2f}")
    print(f"Total Trades: {len(trades)}")
    print(f"Win Rate: {win_rate:.1f}%")
    
    plt.figure(figsize=(12,6))
    plt.plot(equity_curve)
    plt.title(f"Backtest Equity - {symbol}")
    plt.show()

for symbol in Settings.PAIRS:
     csv_path = os.path.join(drive_data_path, f"{symbol}.csv")
     if os.path.exists(csv_path):
         colab_backtest(symbol, csv_path)