In [56]:
import pandas as pd
import numpy as np

In [57]:
features = pd.read_csv("workflow/data/features.csv", index_col=0, parse_dates=True)

features.head(), features.shape

(                MTCH        AZO        PEP        XOM        COF       RVTY  \
 Date                                                                          
 2005-01-04  6.262746  89.699997  30.812344  26.729958  62.349834  18.821409   
 2005-01-05  6.213877  90.290001  30.848204  26.590258  61.728230  18.260733   
 2005-01-06  6.196771  89.550003  31.069269  26.928753  62.501431  18.441881   
 2005-01-07  6.257859  89.730003  31.338142  26.751440  62.129990  18.433252   
 2005-01-10  6.270078  90.739998  32.120846  26.853527  62.395287  18.459126   
 
                     GE   CDNS        FCX        EQR  ...   AZO_RET   PEP_RET  \
 Date                                                 ...                       
 2005-01-04  131.925140  13.30  11.094222  14.954946  ... -0.011638 -0.007150   
 2005-01-05  131.122208  13.31  10.966949  14.475262  ...  0.006556  0.001163   
 2005-01-06  132.180527  13.34  10.951435  14.475262  ... -0.008230  0.007141   
 2005-01-07  131.377762  13.28  1

In [58]:
lookback = 30
n_samples = len(features) - lookback
n_features = len(features.columns)

n_samples, lookback, n_features

(4702, 30, 22)

In [59]:
X = np.array([features.iloc[i-lookback:i] for i in range(lookback, len(features))])
X.shape

(4702, 30, 22)

In [71]:
import gym
from gym import spaces

class TradeEnv(gym.Env):
    def __init__(self, data, tc=0.002):
        super().__init__()
        self.data = data
        self.n_assets = self.data.shape[-1] // 2
        self.portfolio = np.zeros(self.n_assets)
        self.current_step = 0
        self.action_space = spaces.MultiDiscrete([3] * self.n_assets)
        self.done = False
        self.portfolio_returns = [0, ]
        self.tc = tc
    
    def step(self, action_vector):
        self.current_step += 1

        if self.current_step >= len(self.data) - 1:
            self.done = True

        next_state = self.data[self.current_step]
        total_price = sum(next_state[0][:self.n_assets])

        step_return = 0  # Initialize step return
        for i in range(self.n_assets):
            action = action_vector[i]
            returns = next_state[0][i + self.n_assets]
            price_proportion = next_state[0][i] / total_price if total_price > 0 else 0

            if action == 1:
                step_return += (returns * price_proportion) - self.tc  # Transaction cost applied
                self.portfolio[i] += 1
            elif action == 2:
                step_return -= (returns * price_proportion) + self.tc  # Transaction cost applied
                self.portfolio[i] -= 1


        self.portfolio_returns.append(step_return)  # Append step return to portfolio returns
        volatility = np.std(self.portfolio_returns)
        mean_portfolio_return = np.mean(self.portfolio_returns)
        sharpe = mean_portfolio_return / volatility if volatility != 0 else 0  # Check for division by zero

        if self.done:
            self.current_step = 0
            self.portfolio = np.zeros(self.n_assets)
            self.portfolio_returns = [0, ]  # Reset to empty list

        return next_state, sharpe, self.done, {}

    def reset(self):
        self.portfolio = np.zeros(self.n_assets)
        self.current_step = 0
        self.done = False
        self.portfolio_returns = [0, ]
        return self.data[self.current_step]

In [117]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class LSTMDQN(nn.Module):
    def __init__(self, input_dim, n_assets, hidden_dim=64, n_layers=1):
        super(LSTMDQN, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_dim, 64)
        self.fc2 = nn.Linear(64, n_assets * 3)  # Three actions per asset: hold, buy, sell

    def forward(self, x):
        h_0 = torch.zeros(1, x.size(0), 64).to(x.device)
        c_0 = torch.zeros(1, x.size(0), 64).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc1(out[:, -1, :])  # Use the last LSTM output only
        out = self.fc2(out)
        return out.view(out.size(0), -1, 3)  # Reshape to (batch_size, n_assets, 3)

In [118]:
# Initialize your environment
env = TradeEnv(X[:(len(X)//2)])  # Replace 'data' with your actual data array
n_assets = env.n_assets

# Initialize the DQN model
input_dim = 22  # Number of features
model = LSTMDQN(input_dim, n_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [119]:
from tqdm.auto import tqdm
n_episodes = 10
lookback = 50  # Define your lookback window
epsilon = 0.1  # For ε-greedy action selection

for episode in tqdm(range(n_episodes)):
    state = env.reset()
    state = state[:lookback]  # Consider only 'lookback' recent states
    done = False
    episode_reward = 0

    while not done:
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values = model(state_tensor)

        if np.random.rand() < epsilon:
            action = np.random.randint(3, size=n_assets)  # Random action
        else:
            action = torch.argmax(q_values, dim=2).cpu().numpy()[0]  # Greedy action

        next_state, reward, done, _ = env.step(action)
        next_state = next_state[:lookback]
        episode_reward += reward

        # Q-learning update
        # Q-learning update
        # Q-learning update
        with torch.no_grad():
            next_q_values = model(torch.FloatTensor(next_state).unsqueeze(0))
            max_next_q_values, _ = torch.max(next_q_values, dim=2)
            target = (reward + 0.99 * max_next_q_values).detach()  # Detach target from computation graph

                # Expanding dimensions of action_indices to match q_values
        action_indices_expanded = torch.LongTensor(action).unsqueeze(0).unsqueeze(2)

        # Gathering chosen q_values based on actions
        chosen_q_values = torch.gather(q_values, 2, action_indices_expanded).squeeze(2)

        # Compute loss
        loss = criterion(chosen_q_values, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        state = next_state

    print(f"Episode {episode+1}, Episode Reward: {episode_reward}")

  0%|          | 0/10 [00:00<?, ?it/s]

Episode 1, Episode Reward: -5144.673392375755
Episode 2, Episode Reward: -5333.034206299664
Episode 3, Episode Reward: -5389.174351476372
Episode 4, Episode Reward: -5589.210034088686
Episode 5, Episode Reward: -5463.713089841311
Episode 6, Episode Reward: -5523.735496703524
Episode 7, Episode Reward: -5636.696202021589
Episode 8, Episode Reward: -5180.571737226846
Episode 9, Episode Reward: -4449.08069387108
Episode 10, Episode Reward: -4834.179559749112


In [126]:
def test_model(model, env, n_episodes=10, lookback=50):
    rewards = []
    sharpe_ratios = [0, ]

    for episode in tqdm(range(n_episodes)):
        state = env.reset()
        state = state[:lookback]  # Consider only 'lookback' recent states
        done = False
        episode_reward = 0

        while not done:
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            q_values = model(state_tensor)
            action = torch.argmax(q_values, dim=2).cpu().numpy()[0]  # Greedy action

            next_state, reward, done, _ = env.step(action)
            next_state = next_state[:lookback]
            episode_reward += reward

            state = next_state

        sharpe_ratio = episode_reward / np.std(env.portfolio_returns) if np.std(env.portfolio_returns) != 0 else 0
        sharpe_ratios.append(sharpe_ratio)
        rewards.append(episode_reward)
        print(env.portfolio_returns)

        print(f"Test Episode {episode+1}, Episode Reward: {episode_reward}, Sharpe Ratio: {sharpe_ratio}")
        print(sharpe_ratios)

    avg_reward = np.mean(rewards)
    avg_sharpe_ratio = np.mean(sharpe_ratios)
    print(f"Average Test Reward: {avg_reward}, Average Test Sharpe Ratio: {avg_sharpe_ratio}")

# Test the model after training
test_model(model, env)

  0%|          | 0/10 [00:00<?, ?it/s]

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


[]
Test Episode 1, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan]
[]
Test Episode 2, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan]
[]
Test Episode 3, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan]
[]
Test Episode 4, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan, nan]
[]
Test Episode 5, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan, nan, nan]
[]
Test Episode 6, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan, nan, nan, nan]
[]
Test Episode 7, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan, nan, nan, nan, nan]
[]
Test Episode 8, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan, nan, nan, nan, nan, nan]
[]
Test Episode 9, Episode Reward: -8033.0742275396415, Sharpe Ratio: nan
[0, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[]
Test Episode 10, Episode Reward: -8033.0742275396415, Sharpe Ratio: na

In [122]:
rewards

NameError: name 'rewards' is not defined