# Deep Multi-Pair Forex Trading System - Colab Training

This notebook trains the LSTM-DQN model using data stored in your Google Drive.
**Data Location:** `/content/drive/MyDrive/data/{symbol}.csv`

### ⚡ enable GPU
To speed up training, ensure you really are using a GPU Runtime:
1. In the menu, go to **Runtime** > **Change runtime type**.
2. Select **T4 GPU** (or better) under Hardware accelerator.
3. Click **Save**.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Verify GPU
import torch
if torch.cuda.is_available():
    print(f"✅ GPU Available: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ GPU NOT Detected! Please enable it in Runtime > Change runtime type.")

In [None]:
# Install dependencies
!pip install pandas_ta

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import pandas_ta as ta
import numpy as np
from tqdm import tqdm
import os
from torch.utils.data import Dataset, DataLoader
import random
import matplotlib.pyplot as plt

In [None]:
# --- CONFIGURATION ---
class Settings:
    PAIRS = ['XAUUSD', 'EURUSD', 'GBPUSD'] 
    
    # Core
    SEQUENCE_LENGTH = 60
    FEATURES = ['log_ret', 'dist_ema', 'rsi', 'volatility', 'hour']
    INPUT_DIM = len(FEATURES)
    HIDDEN_DIM = 128
    NUM_LAYERS = 2
    DROPOUT = 0.2
    OUTPUT_DIM = 3 

    # Training
    EPOCHS = 50 # Increased to give more time to converge
    BATCH_SIZE = 64
    LEARNING_RATE = 0.001
    GAMMA = 0.99
    EPSILON_START = 1.0
    EPSILON_DECAY = 0.92 # Faster decay so it stops acting randomly sooner
    EPSILON_MIN = 0.01
    
    # Data
    TRAIN_SPLIT_INDEX = 420000
    ATR_PERIOD = 14

In [None]:
# --- MODEL (Brain) ---
class QNetwork(nn.Module):
    def __init__(self, input_dim=Settings.INPUT_DIM, 
                 hidden_dim=Settings.HIDDEN_DIM, 
                 num_layers=Settings.NUM_LAYERS, 
                 dropout=Settings.DROPOUT, 
                 output_dim=Settings.OUTPUT_DIM):
        super(QNetwork, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_step_out = lstm_out[:, -1, :]
        q_values = self.fc(last_step_out)
        return q_values

In [None]:
# --- DATA PROCESSING ---
def prepare_features(df):
    df = df.copy()
    # Log Returns
    df['log_ret'] = np.log(df['close'] / df['close'].shift(1))
    # EMA Distance
    ema50 = ta.ema(df['close'], length=50)
    df['dist_ema'] = (df['close'] - ema50) / df['close']
    # RSI
    df['rsi'] = ta.rsi(df['close'], length=14) / 100.0
    # Volatility
    atr = ta.atr(df['high'], df['low'], df['close'], length=Settings.ATR_PERIOD)
    df['volatility'] = atr / df['close']
    # Time
    df['hour'] = df.index.hour / 23.0

    # Bollinger Bands (Step 3: Squeeze Filter)
    # Appends BBL_20_2.0, BBM_20_2.0, BBU_20_2.0
    bb = ta.bbands(df['close'], length=20, std=2)
    df = pd.concat([df, bb], axis=1)
    
    df.dropna(inplace=True)
    return df

class TradingDataset(Dataset):
    def __init__(self, feature_data, close_prices, seq_len):
        self.feature_data = feature_data
        self.close_prices = close_prices
        self.seq_len = seq_len
        self.valid_indices = range(seq_len, len(feature_data) - 1)

    def __len__(self):
        return len(self.valid_indices)

    def __getitem__(self, idx):
        i = self.valid_indices[idx]
        state_window = self.feature_data[i - self.seq_len : i]
        next_state_window = self.feature_data[i - self.seq_len + 1 : i + 1]
        curr_price = self.close_prices[i-1]
        next_price = self.close_prices[i]

        return {
            'state': torch.FloatTensor(state_window),
            'next_state': torch.FloatTensor(next_state_window),
            'curr_price': torch.tensor(curr_price, dtype=torch.float32),
            'next_price': torch.tensor(next_price, dtype=torch.float32)
        }

In [None]:
# --- TRAINING ENGINE ---
def train_model(symbol, csv_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Training {symbol} on {device}...")
    
    # Load Data
    if not os.path.exists(csv_path):
        print(f"File {csv_path} not found.")
        return
        
    df = pd.read_csv(csv_path, parse_dates=['time'], index_col='time')
    df = prepare_features(df)
    
    feature_data = df[Settings.FEATURES].values
    close_prices = df['close'].values
    
    dataset = TradingDataset(feature_data, close_prices, Settings.SEQUENCE_LENGTH)
    # OPTIMIZATION: pin_memory=True for faster CPU->GPU transfer
    dataloader = DataLoader(dataset, batch_size=Settings.BATCH_SIZE, shuffle=True, drop_last=True, pin_memory=True)
    
    policy_net = QNetwork().to(device)
    optimizer = optim.Adam(policy_net.parameters(), lr=Settings.LEARNING_RATE)
    loss_fn = nn.MSELoss()
    
    epsilon = Settings.EPSILON_START
    
    loss_history = []
    
    for epoch in range(Settings.EPOCHS):
        total_loss = 0
        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{Settings.EPOCHS}")
        
        for batch in pbar:
            # Move to device (GPU)
            state_tensor = batch['state'].to(device, non_blocking=True)
            next_state_tensor = batch['next_state'].to(device, non_blocking=True)
            curr_price = batch['curr_price'].to(device, non_blocking=True)
            next_price = batch['next_price'].to(device, non_blocking=True)
            
            batch_size = state_tensor.size(0)
            
            # Epsilon Greedy
            if random.random() < epsilon:
                action_tensor = torch.randint(0, Settings.OUTPUT_DIM, (batch_size,), device=device)
            else:
                with torch.no_grad():
                    q_values = policy_net(state_tensor)
                    action_tensor = torch.argmax(q_values, dim=1)
            
            # Reward Logic (Solution 1 & 3 & Pip Scaling)
            # 1. Scaling Factor (Fix for EURUSD 'Decimal Dust')
            if "USD" in symbol and "XAU" not in symbol:
                 SCALING_FACTOR = 10000.0 # Forex (EURUSD, GBPUSD) -> 1 pip = 1.0
                 spread_cost_per_unit = 0.0001
            else: 
                 SCALING_FACTOR = 1.0 # Gold/Indices -> $1 = 1.0 (Approx)
                 spread_cost_per_unit = 0.20 # Gold Spread

            price_diff = next_price - curr_price
            
            # 2. Base Penalty (Holding Cost)
            # We scale the penalty too so it hurts
            base_penalty = -0.1 * (spread_cost_per_unit * SCALING_FACTOR)
            reward_tensor = torch.full((batch_size,), base_penalty, device=device)
            
            # Masks
            is_buy = (action_tensor == 1)
            is_sell = (action_tensor == 2)
            
            # Pnl Calculation (Scaled)
            # (Change - Cost) * Scale
            buy_pnl = (price_diff - spread_cost_per_unit) * SCALING_FACTOR
            sell_pnl = (-price_diff - spread_cost_per_unit) * SCALING_FACTOR
            
            # 3. Reward Scaling (The Carrot)
            # If PnL > 0, multiply buy 10.0
            bias_scaler = 10.0
            
            # Buy Rewards
            final_buy = torch.where(buy_pnl > 0, buy_pnl * bias_scaler, buy_pnl)
            reward_tensor[is_buy] = final_buy[is_buy]
            
            # Sell Rewards 
            final_sell = torch.where(sell_pnl > 0, sell_pnl * bias_scaler, sell_pnl)
            reward_tensor[is_sell] = final_sell[is_sell]
            
            # Update
            current_q = policy_net(state_tensor).gather(1, action_tensor.unsqueeze(1))
            with torch.no_grad():
                next_q = policy_net(next_state_tensor).max(1)[0]
                target_q = reward_tensor + (Settings.GAMMA * next_q)
                
            loss = loss_fn(current_q.squeeze(1), target_q)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
        if epsilon > Settings.EPSILON_MIN:
            epsilon *= Settings.EPSILON_DECAY
            
        avg_loss = total_loss / len(dataloader)
        loss_history.append(avg_loss)
        print(f"Avg Loss: {avg_loss:.6f}, Epsilon: {epsilon:.4f}")
        
    # Save Model to Google Drive (same folder as data)
    drive_save_path = os.path.join(os.path.dirname(csv_path), f"{symbol}_brain.pth")
    torch.save(policy_net.state_dict(), drive_save_path)
    print(f"Model Saved to {drive_save_path}!")
    
    plt.plot(loss_history)
    plt.title(f"Training Loss - {symbol}")
    plt.show()

In [None]:
# --- EXECUTION ---

# Using Google Drive Path
drive_data_path = "/content/drive/MyDrive/data"

pairs = ['XAUUSD', 'EURUSD', 'GBPUSD'] # Added other pairs

for symbol in pairs:
    # Look for file in the specific Drive location
    csv_path = os.path.join(drive_data_path, f"{symbol}.csv")
    
    if os.path.exists(csv_path):
        print(f"Found data for {symbol} at: {csv_path}")
        train_model(symbol, csv_path)
    else:
        print(f"Data for {symbol} not found. Skipping.")
    


In [None]:
# --- BACKTEST ENGINE (Colab Version) ---
# This runs immediately after training to verify performance on the Test Split

def colab_backtest(symbol, csv_path):
    print(f"\n--- Starting Backtest for {symbol} ---")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Path to the model we just trained/saved
    model_path = os.path.join(os.path.dirname(csv_path), f"{symbol}_brain.pth")
    
    if not os.path.exists(model_path):
        print(f"Model {model_path} not found. Please train first.")
        return

    # Load Model
    policy_net = QNetwork().to(device)
    policy_net.load_state_dict(torch.load(model_path, map_location=device))
    policy_net.eval()
    print("Model loaded.")
    
    # Load Data
    df = pd.read_csv(csv_path, parse_dates=['time'], index_col='time')
    
    # Split Test Data (Using strict index from Settings)
    test_start_idx = Settings.TRAIN_SPLIT_INDEX 
    if len(df) < test_start_idx + 100:
        print(f"Not enough data for backtest. Total: {len(df)}, Needed: >{test_start_idx}")
        return
        
    print(f"Backtesting on rows {test_start_idx} to {len(df)}...")
    df_test = df.iloc[test_start_idx:].copy()
    df_test = prepare_features(df_test)
    
    # Prepare Vectors
    feature_data = df_test[Settings.FEATURES].values
    opens = df_test['open'].values
    highs = df_test['high'].values
    lows = df_test['low'].values
    closes = df_test['close'].values
    times = df_test.index
    atrs = df_test['volatility'].values * closes

    # BB Columns for Filter
    bb_u = df_test['BBU_20_2.0'].values
    bb_l = df_test['BBL_20_2.0'].values
    
    # Simulation State
    balance = 10000.0
    equity_curve = [balance]
    position = 0 # 0=None, 1=Long, -1=Short
    entry_price = 0.0
    stop_loss = 0.0
    
    # Costs (Dynamic based on Symbol)
    if "USD" in symbol and "XAU" not in symbol:
         # Forex (EURUSD, GBPUSD)
         contract_size = 100000 
         self_spread_cost_per_trade = 0.10 
         txn_cost = 0.15
    else:
         # Gold (XAUUSD)
         contract_size = 100 
         self_spread_cost_per_trade = 0.20 
         txn_cost = 0.27
    
    trades = []
    
    for t in tqdm(range(Settings.SEQUENCE_LENGTH, len(df_test) - 1)):
        # State Input
        state_tensor = torch.FloatTensor(feature_data[t - Settings.SEQUENCE_LENGTH : t]).unsqueeze(0).to(device)
        
        # Model Decision
        with torch.no_grad():
            q = policy_net(state_tensor)
            
            # PROBABILITY FILTER (Fix 1: Confidence > 0.60)
            probs = torch.nn.functional.softmax(q, dim=1)
            confidence, action = torch.max(probs, dim=1)
            
            action = action.item()
            confidence = confidence.item()
            
            if action != 0 and confidence < 0.60:
                action = 0 # VETO: Low Confidence

            # --- FILTERS (Asset Personality Fix) ---
            if action != 0: # Only filter if trying to trade
                 current_time = times[t]
                 
                 # 1. TIME FILTER (EURUSD Kill Switch)
                 # Only trade 08:00 - 17:00
                 if "USD" in symbol and "XAU" not in symbol:
                      if current_time.hour < 8 or current_time.hour > 17:
                           action = 0
                 
                 # 2. BOLLINGER SQUEEZE FILTER
                 # If volatility < 0.05%, kill it
                 width = bb_u[t] - bb_l[t]
                 vol_pct = width / closes[t]
                 if vol_pct < 0.0005:
                      action = 0
            
        # Market Data at Next Open (Execution)
        next_open = opens[t+1]
        next_high = highs[t+1]
        next_low = lows[t+1]
        next_time = times[t+1]
        atr = atrs[t]
        
        # --- 1. Exits (SL / Time / Breakeven) ---
        trade_closed = False
        exit_price = 0.0
        
        if position != 0:
            # Time Hard Exit (20:00)
            if next_time.hour >= 20 and next_time.minute == 0:
                exit_price = next_open
                trade_closed = True
            
            # SL Check
            elif position == 1:
                 if next_low <= stop_loss:
                      exit_price = stop_loss
                      trade_closed = True
                 # Breakeven
                 elif (next_high - entry_price) > (1.0 * atr) and stop_loss < entry_price:
                      stop_loss = entry_price

            elif position == -1:
                 if next_high >= stop_loss:
                      exit_price = stop_loss
                      trade_closed = True
                 # Breakeven
                 elif (entry_price - next_low) > (1.0 * atr) and stop_loss > entry_price:
                      stop_loss = entry_price
                
            if trade_closed:
                # Calculate PnL (Standardized)
                if position == 1:
                     gross_pnl = (exit_price - entry_price) * contract_size * lot_size
                else:
                     gross_pnl = (entry_price - exit_price) * contract_size * lot_size
                     
                net_pnl = gross_pnl - txn_cost
                balance += net_pnl
                trades.append(net_pnl)
                position = 0
        
        # --- 2. Entries ---
        if not trade_closed:
            if action == 1: # Buy Signal
                if position == -1: # Reverse
                     exit_price = next_open
                     gross_pnl = (entry_price - exit_price) * contract_size * lot_size
                     net_pnl = gross_pnl - txn_cost
                     balance += net_pnl
                     trades.append(net_pnl)
                     
                     position = 1
                     entry_price = next_open
                     stop_loss = entry_price - (atr * 2.5)
                elif position == 0:
                     position = 1
                     entry_price = next_open
                     stop_loss = entry_price - (atr * 2.5)
            
            elif action == 2: # Sell Signal
                if position == 1: # Reverse
                     exit_price = next_open
                     gross_pnl = (exit_price - entry_price) * contract_size * lot_size
                     net_pnl = gross_pnl - txn_cost
                     balance += net_pnl
                     trades.append(net_pnl)
                     
                     position = -1
                     entry_price = next_open
                     stop_loss = entry_price + (atr * 2.5)
                elif position == 0:
                     position = -1
                     entry_price = next_open
                     stop_loss = entry_price + (atr * 2.5)
                     
        equity_curve.append(balance)
        
    # Stats
    win_rate = sum(1 for t in trades if t > 0) / len(trades) * 100 if trades else 0
    print(f"\nFinal Balance: ${balance:.2f}")
    print(f"Total Trades: {len(trades)}")
    print(f"Win Rate: {win_rate:.1f}%")
    
    plt.figure(figsize=(12,6))
    plt.plot(equity_curve)
    plt.title(f"Backtest Equity - {symbol}")
    plt.show()

# Run Backtest for all pairs
for symbol in pairs:
     csv_path = os.path.join(drive_data_path, f"{symbol}.csv")
     if os.path.exists(csv_path):
         colab_backtest(symbol, csv_path)