In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as utils_data

import time as time

from train_deep import run_epoch, train, infer_model
from sota_models.deeplob import DeepLOB
from sota_models.deepreslob import DeepResLOB

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
import wandb
!wandb login 031af70dba88e746696d15cc5bdddf1dc268ab62

Successfully logged in to Weights & Biases!


wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\asang/.netrc


In [43]:
def load_weights(model, dataset, k, token_name=None):
    """
    Parameters:
        model - the neural network model, should be either DeepLOB or DeepResLOB
        dataset - "FI-2010" for the benchmark dataset;
                  "Tokens-setup1" for the first experimental setup for tokens;
                  "Tokens-setup2" for the second experimental setup for tokens;
        k - desired prediction horizon, available:
            FI-2010 : k = 1, 5, 10;
            Crypto : k = 1, 5, 10, 20
        token_name - is only used in case "Tokens-setup1" is used, viable values:
                     "BTC", "ETH", "LTC", "XRP"
    """
    model_name = model.__class__.__name__
    path = './weights/'+model_name+'/{}/'.format(dataset)
    if dataset == 'Tokens-setup1':
        path += '/{}/'.format(token_name.lower())
    if model_name == 'DeepResLOB':
        model_name = 'ResLOB'
    path += model_name.lower()+'_'
    if dataset == 'Tokens-setup1':
        path += token_name.lower()+'_'
    path += str(k)
    model.load_state_dict(torch.load(path+'.pth', map_location='cpu'))

# FI-2010 dataset

In [36]:
# Detect if we are in Google Colaboratory
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

from pathlib import Path
# Determine the locations of auxiliary libraries and datasets.
# `AUX_DATA_ROOT` is where 'notmnist.py', 'animation.py' and 'tiny-imagenet-2020.zip' are.
if IN_COLAB:
    google.colab.drive.mount("/content/drive")
    
    # Change this if you created the shortcut in a different location
    AUX_DATA_ROOT = Path("/content/drive/My Drive/Project Data")
else:
    AUX_DATA_ROOT = Path("./FI-2010")

In [37]:
# Load data
train_X, train_Y = np.load(AUX_DATA_ROOT / 'train_data.npy'), np.load(AUX_DATA_ROOT / 'train_labels.npy')
val_X, val_Y = np.load(AUX_DATA_ROOT / 'val_data.npy'), np.load(AUX_DATA_ROOT / 'val_labels.npy')
test_X, test_Y = np.load(AUX_DATA_ROOT / 'test_data.npy'), np.load(AUX_DATA_ROOT / 'test_labels.npy')

In [38]:
from dataset_wrapper import FI_Dataset

# wrap data into PyTorch datasets
k = 0 # prediction horizon: 0 := (k = 1), 1 := (k = 2), 2 := (k = 3), 3 := (k = 5), 4 := (k = 10)
T = 100 # sliding time window size
train_dset, val_dset, test_dset = FI_Dataset(train_X, train_Y, T, k), FI_Dataset(val_X, val_Y, T, k),\
                                  FI_Dataset(test_X, test_Y, T, k)

In [8]:
DO_TRAIN = False

In [56]:
if DO_TRAIN:
    train_loader = utils_data.DataLoader(train_dset, shuffle=True, num_workers=4, batch_size=64, pin_memory=True)
    val_loader   = utils_data.DataLoader(val_dset, shuffle=False, num_workers=4, batch_size=64, pin_memory=True)
    
    # DeepLOB
    deep_lob = DeepLOB().to(device)
    dl_optimizer = optim.Adam(deep_lob.parameters(), lr=0.01, eps=1)
    
    # DeepResLOB
    deep_res_lob = DeepResLOB().to(device)
    rl_optimizer = optim.Adam(deep_res_lob.parameters(), lr=0.01, eps=1, weight_decay=1e-4) # 1e-3 for k = 10
    criterion = nn.CrossEntropyLoss()
    
    train(deep_lob, dl_optimizer, criterion, train_loader, val_loader, device, n_epochs=100,\
          ch_name='./deeplob_best.pth', early_stopping=True, es_patience=20, metric='accuracy', freq=1)
    train(deep_res_lob, rl_optimizer, criterion, train_loader, val_loader, device, n_epochs=100,\
          ch_name='./reslob_best.pth', early_stopping=True, es_patience=20, metric='accuracy', freq=1)

In [41]:
test_loader = utils_data.DataLoader(test_dset, shuffle=False, batch_size=64)

In [11]:
deep_lob = DeepLOB()
load_weights(deep_lob, 'FI-2010', 1)
deep_lob = deep_lob.to(device)

infer_model(deep_lob, test_loader, device) # (f1, acc, precision, recall)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


(81.79593373493977, 80.88149782728132, 83.02023544848606, 81.79593373493977)

In [42]:
deep_res_lob = DeepResLOB()
load_weights(deep_res_lob, 'FI-2010', 1)
deep_res_lob = deep_res_lob.to(device)

infer_model(deep_res_lob, test_loader, device) # (f1, acc, precision, recall)

./weights/DeepResLOB/FI-2010/reslob_1


(82.43913152610442, 81.28611724970958, 83.9808184022695, 82.43913152610442)

# Cryptocurrency - setup 1

In [44]:
# Detect if we are in Google Colaboratory
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

from pathlib import Path
# Determine the locations of auxiliary libraries and datasets.
# `AUX_DATA_ROOT` is where 'notmnist.py', 'animation.py' and 'tiny-imagenet-2020.zip' are.
if IN_COLAB:
    google.colab.drive.mount("/content/drive")
    
    # Change this if you created the shortcut in a different location
    AUX_DATA_ROOT = Path("/content/drive/My Drive/Processed_datasets")
else:
    AUX_DATA_ROOT = Path("./crypto_data")

In [45]:
# Load data
def load_token(token_name, k):
    """
    Parameters:
        token_name - available tokens: "BTC", "ETH", "LTC", "XRP",
        k - prediction horizon, available: 1, 5, 10, 20
    """
    data = np.load(AUX_DATA_ROOT / ('input_'+token_name.upper()+'USDT'+'_'+str(k)+'.npy'))
    labels = np.load(AUX_DATA_ROOT / ('labels_'+token_name.upper()+'USDT'+'_'+str(k)+'.npy'))
    
    return data, labels

In [46]:
data, labels = load_token("BTC", 1)

In [47]:
from dataset_wrapper import CryptoDataset

def train_val_test_split(data, labels, splits=None, T=60):
    """
    Parameters:
        data, labels - input data and labels,
        splits - tuple of integers (train_size, val_size),
        T - sliding time window size
    """
    if splits is None:
        splits = (int(0.7 * data.shape[0]), int(0.15 * data.shape[0]))
    train_X, train_Y = data[:splits[0]], labels[:splits[0]]
    val_X, val_Y = data[splits[0]:splits[0]+splits[1]], labels[splits[0]:splits[0]+splits[1]]
    test_X, test_Y = data[splits[0]+splits[1]:], labels[splits[0]+splits[1]:]
    
    num_labels = [(train_Y == i).sum() for i in range(3)]
    max_labels = max(num_labels)
    weights    = [max_labels / num_labels[i] for i in range(3)]
    
    train_dset = CryptoDataset(train_X, train_Y, T)
    val_dset = CryptoDataset(val_X, val_Y, T)
    test_dset = CryptoDataset(test_X, test_Y, T)
    
    return train_dset, val_dset, test_dset, weights

In [48]:
train_dset, val_dset, test_dset, weights = train_val_test_split(data, labels)

In [49]:
DO_TRAIN = False

In [54]:
if DO_TRAIN:
    train_loader = utils_data.DataLoader(train_dset, shuffle=True, num_workers=4, batch_size=64, pin_memory=True)
    val_loader   = utils_data.DataLoader(val_dset, shuffle=False, num_workers=4, batch_size=64, pin_memory=True)
    
    # DeepLOB
    deep_lob = DeepLOB().to(device)
    dl_optimizer = optim.Adam(deep_lob.parameters(), lr=0.01, eps=1)
    
    # DeepResLOB
    deep_res_lob = DeepResLOB().to(device)
    rl_optimizer = optim.Adam(deep_res_lob.parameters(), lr=0.01, eps=1, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss(torch.tensor(weights, dtype=torch.float, device=device))
    
    train(deep_lob, dl_optimizer, criterion, train_loader, val_loader, device, n_epochs=100,\
          ch_name='./deeplob_crypto_best.pth', early_stopping=True, es_patience=20, metric='f1 score', freq=1)
    train(deep_res_lob, rl_optimizer, criterion, train_loader, val_loader, device, n_epochs=100,\
          ch_name='./reslob_crypto_best.pth', early_stopping=True, es_patience=20, metric='f1 score', freq=1)

In [51]:
test_loader = utils_data.DataLoader(test_dset, shuffle=False, batch_size=64)

In [30]:
deep_lob = DeepLOB()
load_weights(deep_lob, 'Tokens-setup1', 1, 'BTC')
deep_lob = deep_lob.to(device)

infer_model(deep_lob, test_loader, device) # (f1, acc, precision, recall)

(81.0210905447909, 81.88823049930238, 89.20778206023384, 81.02109053497942)

In [52]:
deep_res_lob = DeepResLOB()
load_weights(deep_res_lob, 'Tokens-setup1', 1, 'BTC')
deep_res_lob = deep_res_lob.to(device)

infer_model(deep_res_lob, test_loader, device) # (f1, acc, precision, recall)

(84.83539094895492, 84.32400780474309, 89.11055882230482, 84.83539094650206)

# Cryptocurrency - setup 2

In [53]:
# Detect if we are in Google Colaboratory
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

from pathlib import Path
# Determine the locations of auxiliary libraries and datasets.
# `AUX_DATA_ROOT` is where 'notmnist.py', 'animation.py' and 'tiny-imagenet-2020.zip' are.
if IN_COLAB:
    google.colab.drive.mount("/content/drive")
    
    # Change this if you created the shortcut in a different location
    AUX_DATA_ROOT = Path("/content/drive/My Drive/Processed_datasets")
else:
    AUX_DATA_ROOT = Path("./crypto_data")

In [58]:
# Load data
def load_data(k, splits=None, T=60):
    """
    Parameters:
        k - prediction horizon, available: 1, 5, 10, 20,
        splits - tuple of two ints (train_size, val_size),
        T - sliding time window size
    """
    btc_data = np.load(AUX_DATA_ROOT / ('input_BTCUSDT_'+str(k)+'.npy'))
    btc_labels = np.load(AUX_DATA_ROOT / ('labels_BTCUSDT_'+str(k)+'.npy'))
    
    ltc_data = np.load(AUX_DATA_ROOT / ('input_LTCUSDT_'+str(k)+'.npy'))
    ltc_labels = np.load(AUX_DATA_ROOT / ('labels_LTCUSDT_'+str(k)+'.npy'))
    
    eth_data = np.load(AUX_DATA_ROOT / ('input_ETHUSDT_'+str(k)+'.npy'))
    eth_labels = np.load(AUX_DATA_ROOT / ('labels_ETHUSDT_'+str(k)+'.npy'))
    
    if splits is None:
        splits = [(int(0.8 * btc_data.shape[0]), int(0.1 * btc_data.shape[0])),\
                  (int(0.8 * eth_data.shape[0]), int(0.1 * eth_data.shape[0]))]
    
    # Train and validation sets
    train_X = np.concatenate((btc_data[:splits[0][0]], ltc_data[:splits[0][0]], eth_data[:splits[1][0]]), axis=0)
    train_Y = np.concatenate((btc_labels[:splits[0][0]], ltc_labels[:splits[0][0]], eth_labels[:splits[1][0]]), axis=0)

    val_X   = np.concatenate((btc_data[splits[0][0]:splits[0][0]+splits[0][1]],\
                              ltc_data[splits[0][0]:splits[0][0]+splits[0][1]],\
                              eth_data[splits[1][0]:splits[1][0]+splits[1][1]]), axis=0)
    val_Y   = np.concatenate((btc_labels[splits[0][0]:splits[0][0]+splits[0][1]],\
                              ltc_labels[splits[0][0]:splits[0][0]+splits[0][1]],\
                              eth_labels[splits[1][0]:splits[1][0]+splits[1][1]]), axis=0)
    
    num_labels = [(train_Y == i).sum() for i in range(3)]
    max_labels = max(num_labels)
    weights    = [max_labels / num_labels[i] for i in range(3)]
    
    train_dset = CryptoDataset(train_X, train_Y, T)
    val_dset = CryptoDataset(val_X, val_Y, T)
    
    # Separate test sets
    test_btc_X, test_btc_Y = btc_data[splits[0][0]+splits[0][1]:], btc_labels[splits[0][0]+splits[0][1]:]
    test_ltc_X, test_ltc_Y = ltc_data[splits[0][0]+splits[0][1]:], ltc_labels[splits[0][0]+splits[0][1]:]
    test_eth_X, test_eth_Y = eth_data[splits[1][0]+splits[1][1]:], eth_labels[splits[1][0]+splits[1][1]:]
    
    test_btc_dset = CryptoDataset(test_btc_X, test_btc_Y, T)
    test_ltc_dset = CryptoDataset(test_ltc_X, test_ltc_Y, T)
    test_eth_dset = CryptoDataset(test_eth_X, test_eth_Y, T)
    
    test_sets = [test_btc_dset, test_ltc_dset, test_eth_dset]
    
    return train_dset, val_dset, test_sets, weights

In [59]:
train_dset, val_dset, test_sets, weights = load_data(1)

In [72]:
DO_TRAIN = False

In [64]:
if DO_TRAIN:
    train_loader = utils_data.DataLoader(train_dset, shuffle=True, num_workers=4, batch_size=64, pin_memory=True)
    val_loader   = utils_data.DataLoader(val_dset, shuffle=False, num_workers=4, batch_size=64, pin_memory=True)
    
    # DeepLOB
    deep_lob = DeepLOB().to(device)
    dl_optimizer = optim.Adam(deep_lob.parameters(), lr=0.01, eps=1)
    
    # DeepResLOB
    deep_res_lob = DeepResLOB().to(device)
    rl_optimizer = optim.Adam(deep_res_lob.parameters(), lr=0.01, eps=1, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss(torch.tensor(weights, dtype=torch.float, device=device))
    
    train(deep_lob, dl_optimizer, criterion, train_loader, val_loader, device, n_epochs=100,\
          ch_name='./deeplob_crypto_best.pth', early_stopping=True, es_patience=20, metric='f1 score', freq=1)
    train(deep_res_lob, rl_optimizer, criterion, train_loader, val_loader, device, n_epochs=100,\
          ch_name='./reslob_crypto_best.pth', early_stopping=True, es_patience=20, metric='f1 score', freq=1)

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable
wandb: Wandb version 0.9.0 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


KeyboardInterrupt: 

In [71]:
def setup2(model, test_sets, device, k=1):
    names = ["BTC", "LTC", "ETH"]

    for i in range(3):
        test_loader = utils_data.DataLoader(test_sets[i], shuffle=False, batch_size=64)

        test_acc, test_f1, test_prec, test_rec = infer_model(model, test_loader, device)
        print(names[i]+":")
        print("Test accuracy: {} %".format(test_acc))
        print("Test F1 score: {} %".format(test_f1))
        print("Test precision score: {} %".format(test_prec))
        print("Test recall score: {} %".format(test_rec))
    
    test_X, test_Y = load_token("XRP", k)
    test_dset   = CryptoDataset(test_X, test_Y, T=60)
    test_loader = utils_data.DataLoader(test_dset, shuffle=False, batch_size=64)
    
    test_acc, test_f1, test_prec, test_rec = infer_model(model, test_loader, device)
    print("XRP transfer learning:")
    print("Test accuracy: {} %".format(test_acc))
    print("Test F1 score: {} %".format(test_f1))
    print("Test precision score: {} %".format(test_prec))
    print("Test recall score: {} %".format(test_rec))

In [69]:
deep_lob = DeepLOB()
load_weights(deep_lob, 'Tokens-setup2', 1)
deep_lob = deep_lob.to(device)

setup2(deep_lob, test_sets, device, 1)

BTC:
Test accuracy: 86.69367285422337 %
Test F1 score: 87.43848472278508 %
Test precision score: 91.66555230522587 %
Test recall score: 86.69367283950618 %
LTC:
Test accuracy: 85.5150463036549 %
Test F1 score: 85.60711868002161 %
Test precision score: 90.3459292213265 %
Test recall score: 85.51504629629629 %
ETH:
Test accuracy: 88.4331983590276 %
Test F1 score: 88.41068679439317 %
Test precision score: 91.25881901563436 %
Test recall score: 88.43319835053993 %
XRP transfer learning:
Test accuracy: 83.91782407407408 %
Test F1 score: 84.21731384536011 %
Test precision score: 90.04753871511154 %
Test recall score: 83.91782407407408 %


In [70]:
deep_res_lob = DeepResLOB()
load_weights(deep_res_lob, 'Tokens-setup2', 1)
deep_lob = deep_res_lob.to(device)

setup2(deep_res_lob, test_sets, device, 1)

BTC:
Test accuracy: 89.90354939007464 %
Test F1 score: 90.40081105959054 %
Test precision score: 93.143718531075 %
Test recall score: 89.90354938271605 %
LTC:
Test accuracy: 85.5054012419265 %
Test F1 score: 85.62350985419458 %
Test precision score: 90.22510157121391 %
Test recall score: 85.5054012345679 %
ETH:
Test accuracy: 88.72189094435494 %
Test F1 score: 88.66901986114077 %
Test precision score: 91.55992096511478 %
Test recall score: 88.72189094576956 %
XRP transfer learning:
Test accuracy: 86.19695216049382 %
Test F1 score: 86.49951240356641 %
Test precision score: 91.20760804953395 %
Test recall score: 86.19695216049382 %
