In [1]:
import os
import pandas as pd
import numpy as np
import torch.utils.data as data
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

from utils.dataset_utils import concatenate_prices_returns, create_rolling_window_ts
from loss_functions.SharpeLoss import SharpeLoss
from models.DLPO import DLPO



In [2]:
# neural network hyperparameters
input_size = 4 * 2
output_size = 4
hidden_size = 64
num_layers = 1

# optimization hyperparameters
learning_rate = 1e-4

# training hyperparameters
device = torch.device('cpu')
epochs = 10
batch_size = 10
shuffle = True
drop_last = True
num_timesteps_in = 50
num_timesteps_out = 1
train_ratio = 0.7

# relevant paths
source_path = os.getcwd()
inputs_path = os.path.join(source_path, "data", "inputs")

# prepare dataset
prices = pd.read_excel(os.path.join(inputs_path, "etfs-zhang-zohren-roberts.xlsx"))
prices.set_index("date", inplace=True)
returns = np.log(prices).diff().dropna()
prices = prices.loc[returns.index]
features = concatenate_prices_returns(prices=prices, returns=returns)
idx = features.index
returns = returns.loc[idx].values.astype('float32')
prices = prices.loc[idx].values.astype('float32')
features = features.loc[idx].values.astype('float32')  

X, prices = create_rolling_window_ts(features=features, 
                                     target=prices,
                                     num_timesteps_in=num_timesteps_in,
                                     num_timesteps_out=num_timesteps_out)

# define train and test datasets
train_size = int(prices.shape[0] * train_ratio)
X_train, prices_train = X[0:train_size], prices[0:train_size]
X_test, prices_test = X[train_size:], prices[train_size:]

# define data loaders
train_loader = data.DataLoader(data.TensorDataset(X_train, prices_train), shuffle=shuffle, batch_size=batch_size, drop_last=drop_last)
test_loader = data.DataLoader(data.TensorDataset(X_test, prices_test), shuffle=False, batch_size=batch_size, drop_last=drop_last)

In [3]:
# (1) model
model = DLPO(input_size=input_size,
             output_size=output_size,
             hidden_size=hidden_size,
             num_layers=num_layers,
             batch_first=True).to(device)

# (2) loss fucntion
lossfn = SharpeLoss()

# (3) optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# (4) training procedure
training_loss_values = []
model.train()
for epoch in range(epochs + 1):

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, (X_batch, prices_batch) in pbar:
                
        # compute forward propagation
        weights_pred = model.forward(X_batch)

        # compute loss
        loss = lossfn(prices_batch, weights_pred, ascent=True)

        # compute gradients and backpropagate
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        pbar.set_description("Epoch: %d, sharpe (loss): %1.5f" % (epoch, loss.item() * -1))
        
    training_loss_values.append(loss.item() * -1)

training_loss_df = pd.DataFrame(training_loss_values, columns=["sharpe_ratio"])

Epoch: 0, sharpe (loss): 0.21059: 100%|██████████| 307/307 [00:02<00:00, 149.60it/s]
Epoch: 1, sharpe (loss): 0.23540: 100%|██████████| 307/307 [00:01<00:00, 175.03it/s]
Epoch: 2, sharpe (loss): 0.17361: 100%|██████████| 307/307 [00:01<00:00, 169.80it/s]
Epoch: 3, sharpe (loss): 0.18576: 100%|██████████| 307/307 [00:01<00:00, 175.89it/s]
Epoch: 4, sharpe (loss): 0.13040: 100%|██████████| 307/307 [00:02<00:00, 135.39it/s]
Epoch: 5, sharpe (loss): 0.10906: 100%|██████████| 307/307 [00:03<00:00, 88.71it/s] 
Epoch: 6, sharpe (loss): 0.16837: 100%|██████████| 307/307 [00:03<00:00, 84.73it/s] 
Epoch: 7, sharpe (loss): 0.20554: 100%|██████████| 307/307 [00:03<00:00, 89.33it/s] 
Epoch: 8, sharpe (loss): 0.18178: 100%|██████████| 307/307 [00:03<00:00, 102.15it/s] 
Epoch: 9, sharpe (loss): 0.13710: 100%|██████████| 307/307 [00:03<00:00, 83.71it/s] 
Epoch: 10, sharpe (loss): 0.23579: 100%|██████████| 307/307 [00:03<00:00, 94.80it/s] 


In [4]:
model.eval()

# Store for analysis
weights = []
prices = []

pbar = tqdm(enumerate(test_loader), total=len(test_loader))
for i, (X_batch, prices_batch) in pbar:
    
    optimizer.zero_grad()
    
    # compute forward propagation
    weights_pred = model.forward(X_batch)

    # compute loss
    loss = lossfn(prices_batch, weights_pred, ascent=True)
    
    # compute gradients and backpropagate
    loss.backward()
    optimizer.step()
    pbar.set_description("Test sharpe (loss): %1.5f" % (loss.item() * -1))

    # store predictions and true values
    prices.append(prices_batch)
    weights.append(weights_pred)

Test: sharpe (loss): 0.03370: 100%|██████████| 131/131 [00:00<00:00, 148.13it/s] 
