In [78]:
import numpy as np
import gym
from gym import spaces
import random
import pandas as pd

In [111]:

class ForexTradingEnv(gym.Env):
    def __init__(self, data, window_size=1000, initial_balance=1000, stop_loss_pct=0.02, take_profit_pct=0.05):
        super(ForexTradingEnv, self).__init__()

        self.data = data  # Forex price data, e.g., pandas DataFrame
        self.initial_balance = initial_balance
        self.window_size = window_size  # Number of past steps to include
        self.stop_loss_pct = stop_loss_pct
        self.take_profit_pct = take_profit_pct
        self.current_step = 0
        self.done = False
        self.balance = initial_balance
        self.position = None  # 'long' or 'short'
        self.entry_price = 0.0

        # Define action and observation space
        self.action_space = spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.window_size * 8,), dtype=np.float32)

    def reset(self):
        self.balance = self.initial_balance
        self.current_step = 0
        self.position = None
        self.entry_price = 0.0
        return self._next_observation()

    def _next_observation(self):
         # Check if we are within the initial steps where full `window_size` data isn't available
        start_index = max(0, self.current_step - self.window_size)
        window_data = self.data.iloc[start_index : self.current_step]

        # If the current step is less than `window_size`, pad with zeros at the beginning
        if len(window_data) < self.window_size:
            # Number of missing rows to pad
            padding_rows = self.window_size - len(window_data)
            # Create padding of zeros (8 features per step)
            padding = np.zeros((padding_rows, 8))
            # Concatenate padding with available data
            window_data = np.vstack((padding, window_data[['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close',
                                                        'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']].values))
        else:
            # Otherwise, just get the required values without padding
            window_data = window_data[['Bid_Open', 'Bid_High', 'Bid_Low', 'Bid_Close',
                                    'Ask_Open', 'Ask_High', 'Ask_Low', 'Ask_Close']].values

        # Flatten the array to make it a 1D observation
        observation = window_data.flatten()
        return observation.astype(np.float32)

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Bid_Close']
        reward = 0

        if self.position is None:
            # Only take Buy or Sell actions if there is no open position
            if action == 1:  # Buy
                self.position = 'long'
                self.entry_price = current_price
            elif action == 2:  # Sell
                self.position = 'short'
                self.entry_price = current_price
            # No reward as we are just entering a position
        else:
            # Only check for stop loss or target if we have an open position
            if self.position == 'long':
                # Calculate potential profit/loss
                profit_pct = (current_price - self.entry_price) / self.entry_price

                # Check for stop-loss or take-profit
                if profit_pct <= -self.stop_loss_pct:
                    reward = current_price - self.entry_price  # Loss from stop-loss
                    self.position = None  # Exit position
                elif profit_pct >= self.take_profit_pct:
                    reward = current_price - self.entry_price  # Profit from take-profit
                    self.position = None  # Exit position

            elif self.position == 'short':
                # Calculate potential profit/loss
                profit_pct = (self.entry_price - current_price) / self.entry_price

                # Check for stop-loss or take-profit
                if profit_pct <= -self.stop_loss_pct:
                    reward = self.entry_price - current_price  # Loss from stop-loss
                    self.position = None  # Exit position
                elif profit_pct >= self.take_profit_pct:
                    reward = self.entry_price - current_price  # Profit from take-profit
                    self.position = None  # Exit position

        return reward

    def step(self, action):
        if self.position is not None and action in [1, 2]:
            # Ignore Buy or Sell actions if we already have an open position
            action = 0  # Force Hold action
        reward = self._take_action(action)
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = self._next_observation()
        return obs, reward, done, action, {}

In [114]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
import copy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_agent(env, num_episodes=5, gamma=0.99, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, batch_size=32):
    memory = deque(maxlen=2000)
    model = DQN(env.observation_space.shape[0], env.action_space.n).to(device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.MSELoss()
    
    for episode in range(num_episodes):
        state = env.reset()
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        total_reward = 0.0
        historic_state = state.clone()
        historic_action = torch.argmax(model(state)).item()
        for t in range(len(env.data)):
            if random.random() < epsilon:
                action = env.action_space.sample()
            else:
                with torch.no_grad():
                    action = torch.argmax(model(state)).item()

            next_state, reward, done, action, _ = env.step(action)
            if action != 0:
                historic_state = state.clone()
                historic_action = copy.copy(action)
            next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device)
            if(reward != 0):
                total_reward += reward
                memory.append((historic_state, historic_action, reward, next_state, done))
                print(f"Episode {episode}, Step {t}, Action: {historic_action}, Reward: {reward:.4f}, Total Reward: {total_reward:.4f}")
            state = next_state
            
            
            if done:
                print(f"Episode {episode}, Total Reward: {total_reward}")
                break

            # Replay and update
            if len(memory) >= batch_size:
                batch = random.sample(memory, batch_size)
                state_batch, action_batch, reward_batch, next_state_batch, done_batch = zip(*batch)

                state_batch = torch.cat([s for s in state_batch]).to(device)
                action_batch = torch.tensor(action_batch, device=device)
                reward_batch = torch.tensor(reward_batch, dtype=torch.float32, device=device)
                next_state_batch = torch.cat([ns for ns in next_state_batch]).to(device)
                done_batch = torch.tensor(done_batch, dtype=torch.float32, device=device)

                current_q = model(state_batch).gather(1, action_batch.unsqueeze(1)).squeeze()
                max_next_q = model(next_state_batch).max(1)[0]
                expected_q = reward_batch + (1 - done_batch) * gamma * max_next_q

                loss = criterion(current_q, expected_q.detach())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        epsilon = max(epsilon_min, epsilon * epsilon_decay)

    return model

In [115]:
data = pd.read_csv("HistoricData\GBP_USD_2010_2017_hourly_bid_ask.csv")
env = ForexTradingEnv(data)
model = train_agent(env)

Episode 0, Step 962, Action: 2, Reward: 0.0809, Total Reward: 0.0809
Episode 0, Step 2355, Action: 2, Reward: 0.0769, Total Reward: 0.1579
Episode 0, Step 3004, Action: 2, Reward: -0.0301, Total Reward: 0.1278
Episode 0, Step 3730, Action: 1, Reward: 0.0798, Total Reward: 0.2076
Episode 0, Step 4273, Action: 1, Reward: -0.0314, Total Reward: 0.1762
Episode 0, Step 4589, Action: 2, Reward: -0.0324, Total Reward: 0.1438
Episode 0, Step 5063, Action: 2, Reward: -0.0314, Total Reward: 0.1124
Episode 0, Step 5145, Action: 1, Reward: -0.0335, Total Reward: 0.0789
Episode 0, Step 6297, Action: 1, Reward: -0.0338, Total Reward: 0.0451
Episode 0, Step 6673, Action: 2, Reward: -0.0316, Total Reward: 0.0134
Episode 0, Step 7051, Action: 2, Reward: -0.0315, Total Reward: -0.0181
Episode 0, Step 11693, Action: 1, Reward: -0.0331, Total Reward: -0.0512
Episode 0, Step 12580, Action: 2, Reward: -0.0318, Total Reward: -0.0830
Episode 0, Step 13992, Action: 2, Reward: 0.0807, Total Reward: -0.0024
Epis

In [29]:
data

Unnamed: 0,Time,Bid_Open,Bid_Low,Bid_High,Bid_Close,Ask_Open,Ask_Low,Ask_High,Ask_Close
0,2010-01-03T18:00:00.000000000Z,1.61542,1.61464,1.61635,1.61511,1.61642,1.61564,1.61735,1.61611
1,2010-01-03T19:00:00.000000000Z,1.61500,1.61390,1.61588,1.61488,1.61600,1.61490,1.61688,1.61588
2,2010-01-03T20:00:00.000000000Z,1.61485,1.61422,1.61488,1.61426,1.61585,1.61522,1.61588,1.61526
3,2010-01-03T21:00:00.000000000Z,1.61426,1.61024,1.61426,1.61082,1.61526,1.61122,1.61562,1.61162
4,2010-01-03T22:00:00.000000000Z,1.61075,1.61051,1.61334,1.61237,1.61155,1.61128,1.61394,1.61297
...,...,...,...,...,...,...,...,...,...
51210,2017-12-29T17:00:00.000000000Z,1.35270,1.35215,1.35354,1.35231,1.35286,1.35233,1.35371,1.35250
51211,2017-12-29T18:00:00.000000000Z,1.35231,1.35170,1.35245,1.35238,1.35253,1.35191,1.35265,1.35258
51212,2017-12-29T19:00:00.000000000Z,1.35230,1.35147,1.35239,1.35169,1.35251,1.35169,1.35258,1.35187
51213,2017-12-29T20:00:00.000000000Z,1.35168,1.34997,1.35172,1.35000,1.35187,1.35018,1.35191,1.35020


<__main__.ForexTradingEnv at 0x254736c3130>