In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display
from pathlib import Path
from torch.utils.tensorboard import SummaryWriter
from IPython.display import HTML
from collections import OrderedDict
%matplotlib inline
torch.manual_seed(1)
pd.set_option("display.precision", 8)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Preprocessing

In [None]:
# Load Data
SFX_BTC_df = pd.read_csv(f'data/SFX_BTC_minute.csv')
BTC_RSD_df = pd.read_csv(f'data/BTC_RSD_minute.csv')
ETH_BTC_df = pd.read_csv(f'data/ETH_BTC_minute.csv')
ETH_RSD_df = pd.read_csv(f'data/ETH_RSD_minute.csv')
SFT_RSD_df = pd.read_csv(f'data/SFT_RSD_minute.csv')
SFX_RSD_df = pd.read_csv(f'data/SFX_RSD_minute.csv')

# Make example dataframe for our timerange to make sure we don't have any duplicates
empty_daterange = pd.date_range(start='2019-06-26 00:00', end='2020-05-25 00:00', freq='T')
df = pd.DataFrame(index=empty_daterange, columns=['A'])
display(df)

# Load data into dictionary to make it easier to create the master dataframe later
df_dict = {'SFX_BTC': SFX_BTC_df, 
           'BTC_RSD': BTC_RSD_df, 
           'ETH_BTC': ETH_BTC_df, 
           'ETH_RSD': ETH_RSD_df, 
           'SFT_RSD': SFT_RSD_df, 
           'SFX_RSD': SFX_RSD_df}

# Sort by longest to preserve data when aligning with example df
df_dict = OrderedDict(sorted(df_dict.items(), key=len, reverse=True))

for i, k in enumerate(df_dict):
    # Set index to datetime
    df_dict[k].index = pd.to_datetime(df_dict[k]['timestamp']).rename('').dt.tz_localize(None)
    # Drop timestamp axis since we already used set it to the index
    df_dict[k].drop(['timestamp'], axis=1, inplace=True)
    # Fill NA values with the previous row
    df_dict[k].fillna(method='ffill', inplace=True)
    # Normalize DF length
    df_dict[k] = df_dict[k]['2019-06-26 00:00':'2020-05-25 00:00']
    # Align Dates
    _, df_dict[k] = df.align(df_dict[k], join='inner', axis=0, method='ffill')
    # Fill the NA quantity values
    df_dict[k]['quantity'].fillna(0, inplace=True)
    # Remove duplicate indices
    df_dict[k] = df_dict[k].loc[~df_dict[k].index.duplicated(keep='last')]
    display(df_dict[k])


In [None]:
dataset_df = pd.DataFrame(index=['SFX_BTC', 'BTC_RSD', 'ETH_BTC', 
                                 'ETH_RSD', 'SFT_RSD', 'SFX_RSD'], 
                          columns=SFX_BTC_df.columns)
for i, k in enumerate(df_dict):
    dataset_df.loc[k] = pd.Series({'open':df_dict[k]['open'].to_numpy(),
                                   'high':df_dict[k]['high'].to_numpy(),
                                   'low':df_dict[k]['low'].to_numpy(), 
                                   'close':df_dict[k]['close'].to_numpy(),
                                   'count':df_dict[k]['count'].to_numpy(),
                                   'volume':df_dict[k]['volume'].to_numpy(),
                                   'quantity':df_dict[k]['quantity'].to_numpy()})


In [None]:
dataset_df

In [None]:
def create_x(input_df, window_size):
    close_value = np.stack(input_df['close'].to_numpy(), axis=1)[:-1]
    open_value = np.stack(input_df['open'].to_numpy(), axis=1)[:-1]
    high_value = np.stack(input_df['high'].to_numpy(), axis=1)[:-1]
    low_value = np.stack(input_df['low'].to_numpy(), axis=1)[:-1]
    V_t = (open_value/close_value[-1])
    V_t_high = (high_value/close_value[-1])
    V_t_low = (low_value/close_value[-1])
    X_t = torch.from_numpy(np.stack([V_t, V_t_high, V_t_low], axis=0).T)
    X_t = np.split(X_t, window_size, axis=1)
    return X_t

In [None]:
# Create our input
X = create_x(dataset_df, 167)
X_test = X[-int(len(X)*0.20):]
X_train = X[:-int(len(X)*0.20)]

# Create our inital weights
w_0 = np.zeros_like([X_t[0].shape[1]]).T
# Set first value to 1 as described in the paper
w_0[0] = 1
W_0 = torch.from_numpy(w_0).T

# Create our cash bias
cash_bias = torch.ones(X_t[0].shape[1], 1)

# Create our dataloaders
test_loader = DataLoader(X_test, batch_size=8)
train_loader = DataLoader(X_train, batch_size=8)

## Model

In [None]:
class DPM_CNN(torch.nn.Module):
    def __init__(self, w_last, cash_bias):
        super().__init__()
        self.w_last = w_last 
        self.cash_bias = cash_bias
        self.conv1 = nn.Conv2d(3, 2, kernel_size=(1,3))
        self.conv2 = nn.Conv2d(2, 20, kernel_size=(1,2878))
        self.conv3 = nn.Conv2d(20, 1, kernel_size=(1, 1))
        self.softmax = nn.Softmax()

    def forward(self, x, w_last):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = torch.cat(out, w_last, axis=-1)
        out = F.relu(self.conv3(out))
        out = torch.flatten(out)
        out = torch.cat(out, self.cash_bias, axis=-1)
        return self.softmax(out)

In [None]:
model = DPM_CNN(W_0, cash_bias)
opt = optim.Adagrad(model.parameters())
criterion = nn.BCELoss()
print(model)

In [None]:
epochs=5
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        w_last = model(data, W_0)
        loss = criterion(w_last, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        print(loss)
