In [1]:
from tqdm import tqdm
from torch_geometric_temporal.signal import temporal_signal_split
import torch
import numpy as np

from data.ETFsZZR import ETFsZZR
from data.CRSPLoader import CRSPLoader
from loss_functions.SharpeLoss import SharpeLoss
from models.TGNNPO import TGNNPO

In [2]:
# load and prepare dataset
loader = CRSPLoader(load_data=True)
etf_tickers = ['SPY', 'XLF', 'XLB', 'XLK', 'XLV']
loader._update_ticker_index(ticker_list=etf_tickers)
dataset = loader.get_dataset(data=loader.select_tickers(tickers=etf_tickers), num_timesteps_in=num_timesteps_in, num_timesteps_out=num_timesteps_out)
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=train_ratio)

# create train dataloaders
train_input = np.array(train_dataset.features)
train_target = np.array(train_dataset.targets)
train_x_tensor = torch.from_numpy(train_input).type(torch.FloatTensor).to(device)
train_target_tensor = torch.from_numpy(train_target).type(torch.FloatTensor).to(device)
train_dataset_new = torch.utils.data.TensorDataset(train_x_tensor, train_target_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset_new, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)

# create test dataloaders
test_input = np.array(test_dataset.features)
test_target = np.array(test_dataset.targets)
test_x_tensor = torch.from_numpy(test_input).type(torch.FloatTensor).to(device)
test_target_tensor = torch.from_numpy(test_target).type(torch.FloatTensor).to(device)
test_dataset_new = torch.utils.data.TensorDataset(test_x_tensor, test_target_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset_new, batch_size=batch_size, shuffle=False, drop_last=drop_last)

# create graph object - assume static graph
static_edge_index = next(iter(train_dataset)).edge_index.to(device)

Loading in saved CRSP data...


  self._load_data(self.load_path)


Generating CRSP dataset...
Generating feature matrix...


100%|██████████| 5531/5531 [00:08<00:00, 622.50it/s]


Generating target matrix...


100%|██████████| 5531/5531 [00:02<00:00, 1986.75it/s]


In [6]:
# neural network hyperparameters
node_features = 985
periods = 5531
nn_batch_size = 2

# optimization hyperparameters
learning_rate = 1e-3

# training hyperparameters
device = torch.device('cpu')
epochs = 10
batch_size = 10
shuffle = False
drop_last = True
num_timesteps_in = 12
num_timesteps_out = 12
train_ratio = 0.7

In [7]:
loss_mean = []

# (1) model
model = TGNNPO(node_features=node_features, periods=periods, batch_size=nn_batch_size).to(device)

# (2) loss function
lossfn = SharpeLoss()

# (3) optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# (4) training procedure
model.train()
for epoch in range(epochs + 1): 
    
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for steps, (X_batch, prices_batch) in pbar:
        
        optimizer.zero_grad()
        
        # predict portfolio weights
        weights_pred = model(X_batch, static_edge_index)
  
        # sharpe ratio loss
        loss = lossfn(prices_batch, weights_pred, ascent=True)
        loss_mean.append(loss.item())
        cur_mean = sum(loss_mean) / len(loss_mean)
        pbar.set_description("Epoch: %d, sharpe (mean loss): %1.5f" % (epoch, cur_mean * -1))

        loss.backward()
        optimizer.step()    

  0%|          | 0/383 [00:00<?, ?it/s]


IndexError: index 50 is out of bounds for dimension 3 with size 50

In [None]:
model.eval()

# Store for analysis
weights = []
prices = []

pbar = tqdm(enumerate(test_loader), total=len(test_loader))
for steps, (X_batch, prices_batch) in pbar:

    # predict portfolio weights
    weights_pred = model(X_batch, static_edge_index)

    # sharpe ratio loss
    loss = lossfn(prices_batch, weights_pred, ascent=True)

    # compute gradients and backpropagate
    loss.backward()
    optimizer.step() 
    optimizer.zero_grad()

    pbar.set_description("Test sharpe (loss): %1.5f" % (loss.item() * -1))

    # store predictions and true values
    prices.append(prices_batch)
    weights.append(weights_pred)