In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from utils.finance_utils import get_financial_data
from utils.torch_utils import TransformerModel

# Load financial data

In [2]:
df = get_financial_data()
df.head()

  df = yf.download(ticker, period='max') # Download data
[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,Daily Return,Lagged Return,Log Return,SMA 14,ATR 14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-02,24.261047,24.72927,23.821672,24.718174,212818400,-0.018494,-0.019019,-0.009558,24.666026,0.555242
2015-01-05,23.577572,24.110148,23.391171,24.030261,257142000,-0.018838,-0.009513,-0.028576,24.610867,0.581079
2015-01-06,23.579792,23.839422,23.218083,23.641926,263188400,-0.002628,-0.028172,9.4e-05,24.579641,0.542245
2015-01-07,23.910431,24.010288,23.677428,23.788382,160423600,0.005131,9.4e-05,0.013925,24.595492,0.511178
2015-01-08,24.82913,24.886826,24.121248,24.238859,237458000,0.024352,0.014022,0.037703,24.634801,0.531942


# Converting Pandas DataFrame to NumPy ndarray

In [3]:
X = df.to_numpy()
X.shape

(2688, 10)

# Split and scale

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test = train_test_split(X, test_size=0.2, shuffle=False)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f'Train: {X_train_scaled.shape}, Test: {X_test_scaled.shape}')

Train: (2150, 10), Test: (538, 10)


# Create sequences

In [5]:
from utils.torch_utils import create_sequence

X_train_seq, y_train_seq = create_sequence(X_train_scaled, input_seq_len=10)
X_test_seq, y_test_seq = create_sequence(X_test_scaled, input_seq_len=7)

print(f'X_train, y_train: {X_train_seq.shape} {y_train_seq.shape}')
print(f'X_test, y_test: {X_test_seq.shape} {y_test_seq.shape}')

X_train, y_train: (2139, 10, 10) (2139, 10)
X_test, y_test: (530, 7, 10) (530, 10)


# Data preparation

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# Convert ndarray to torch Tensor
X_train_seq, X_test_seq, y_train_seq, y_test_seq = torch.Tensor(X_train_seq), torch.Tensor(X_test_seq), torch.Tensor(y_train_seq), torch.Tensor(y_test_seq)
# Put tensors to available device
X_train_seq, X_test_seq, y_train_seq, y_test_seq = X_train_seq.to(device), X_test_seq.to(device), y_train_seq.to(device), y_test_seq.to(device)

cuda


In [7]:
from utils.torch_utils import StockDataset

batch_size = 64

dataset_train = StockDataset(X_train_seq, y_train_seq)
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=False)

dataset_test = StockDataset(X_test_seq, y_test_seq)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)

# Bayes parameter search

In [8]:
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args

space = [
    Integer(3, 6, name="num_layers"),
    Integer(2, 5, name="num_heads"),
    Integer(7, 9, name="d_model"),
    Real(1e-1, 4e-1, name="dropout")
]


@use_named_args(space)
def objective(num_layers, num_heads, d_model, dropout):

    num_heads = 2 ** num_heads
    d_model = 2 ** d_model

    if d_model % num_heads != 0:
        return 1e6

    model = TransformerModel(input_dim=10, 
                             output_dim=10, 
                             d_model=d_model, 
                             nhead=num_heads, 
                             dropout=dropout, 
                             num_layers=num_layers)
    model = model.to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    model.train()

    epochs = 50

    for epoch in range(epochs+1):

        epoch_loss = 0

        for (data, target) in dataloader_train:

            output = model(data)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
    print(f"Avg Loss: {epoch_loss / len(dataloader_train)}, num_layers: {num_layers}, num_heads: {num_heads}, d_model: {d_model}, dropout: {dropout}")
    return epoch_loss / len(dataloader_train)

res = gp_minimize(
    func=objective,
    dimensions=space,
    n_calls=20
)

print("Best loss:", res.fun)
print(f"Best num_layers: {res.x[0]}")
print(f"Best num_heads: {2 ** res.x[1]}")
print(f"Best d_model: {2 ** res.x[2]}")
print(f"Best dropout: {res.x[3]}")
print(f'Best params list: {res.x}')

Avg Loss: 0.30128904651193056, num_layers: 5, num_heads: 4, d_model: 512, dropout: 0.3577383020598617
Avg Loss: 0.2665409848970525, num_layers: 4, num_heads: 16, d_model: 256, dropout: 0.31120549264701747
Avg Loss: 0.27598462823559256, num_layers: 5, num_heads: 32, d_model: 256, dropout: 0.32780333993504474
Avg Loss: 0.29985421720673056, num_layers: 5, num_heads: 16, d_model: 256, dropout: 0.11743771276420756
Avg Loss: 0.23773201007176847, num_layers: 3, num_heads: 4, d_model: 256, dropout: 0.13034580589698735
Avg Loss: 0.32713597311693077, num_layers: 6, num_heads: 32, d_model: 256, dropout: 0.1303294386469343
Avg Loss: 0.28603340707281055, num_layers: 5, num_heads: 32, d_model: 128, dropout: 0.10782806448130296
Avg Loss: 0.2594958061681074, num_layers: 5, num_heads: 16, d_model: 256, dropout: 0.34447069370980077
Avg Loss: 0.2531027848667958, num_layers: 5, num_heads: 32, d_model: 128, dropout: 0.12065325485280821
Avg Loss: 0.23821182373692007, num_layers: 3, num_heads: 8, d_model: 25