In [301]:
import requests
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
import torch
import architecture
from IPython.display import display
import torch.optim as optim
from architecture import LSTM
from pathlib import Path
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import mean_squared_error
import math
import matplotlib.animation as animation 
import logging
import itertools
from IPython.display import HTML
from collections import OrderedDict
%matplotlib inline
torch.manual_seed(1)
pd.set_option("display.precision", 8)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [328]:
# Load Data
SFX_BTC_df = pd.read_csv(f'data/SFX_BTC_minute.csv')
BTC_RSD_df = pd.read_csv(f'data/BTC_RSD_minute.csv')
ETH_BTC_df = pd.read_csv(f'data/ETH_BTC_minute.csv')
ETH_RSD_df = pd.read_csv(f'data/ETH_RSD_minute.csv')
SFT_RSD_df = pd.read_csv(f'data/SFT_RSD_minute.csv')
SFX_RSD_df = pd.read_csv(f'data/SFX_RSD_minute.csv')

# Make example dataframe for our timerange to make sure we don't have any duplicates
empty_daterange = pd.date_range(start='2019-06-26 00:00', end='2020-05-25 00:00', freq='T')
df = pd.DataFrame(index=empty_daterange, columns=['A'])
display(df)

# Load data into dictionary to make it easier to create the master dataframe later
df_dict = {'SFX_BTC': SFX_BTC_df, 
           'BTC_RSD': BTC_RSD_df, 
           'ETH_BTC': ETH_BTC_df, 
           'ETH_RSD': ETH_RSD_df, 
           'SFT_RSD': SFT_RSD_df, 
           'SFX_RSD': SFX_RSD_df}

# Sort by longest to preserve data when aligning with example df
df_dict = OrderedDict(sorted(df_dict.items(), key=len, reverse=True))

for i, k in enumerate(df_dict):
    # Set index to datetime
    df_dict[k].index = pd.to_datetime(df_dict[k]['timestamp']).rename('').dt.tz_localize(None)
    # Drop timestamp axis since we already used set it to the index
    df_dict[k].drop(['timestamp'], axis=1, inplace=True)
    # Fill NA values with the previous row
    df_dict[k].fillna(method='ffill', inplace=True)
    # Normalize DF length
    df_dict[k] = df_dict[k]['2019-06-26 00:00':'2020-05-25 00:00']
    # Align Dates
    _, df_dict[k] = df.align(df_dict[k], join='inner', axis=0, method='ffill')
    # Fill the NA quantity values
    df_dict[k]['quantity'].fillna(0, inplace=True)
    # Remove duplicate indices
    df_dict[k] = df_dict[k].loc[~df_dict[k].index.duplicated(keep='last')]
    display(df_dict[k])


Unnamed: 0,A
2019-06-26 00:00:00,
2019-06-26 00:01:00,
2019-06-26 00:02:00,
2019-06-26 00:03:00,
2019-06-26 00:04:00,
...,...
2020-05-24 23:56:00,
2020-05-24 23:57:00,
2020-05-24 23:58:00,
2020-05-24 23:59:00,


Unnamed: 0,open,high,low,close,count,volume,quantity
2019-06-26 00:00:00,0.00000400,0.00000400,0.00000400,0.00000400,0,0.0,0.0
2019-06-26 00:01:00,0.00000400,0.00000400,0.00000400,0.00000400,0,0.0,0.0
2019-06-26 00:02:00,0.00000400,0.00000400,0.00000400,0.00000400,0,0.0,0.0
2019-06-26 00:03:00,0.00000400,0.00000400,0.00000400,0.00000400,0,0.0,0.0
2019-06-26 00:04:00,0.00000400,0.00000400,0.00000400,0.00000400,0,0.0,0.0
...,...,...,...,...,...,...,...
2020-05-24 23:56:00,0.00000137,0.00000137,0.00000137,0.00000137,0,0.0,0.0
2020-05-24 23:57:00,0.00000137,0.00000137,0.00000137,0.00000137,0,0.0,0.0
2020-05-24 23:58:00,0.00000137,0.00000137,0.00000137,0.00000137,0,0.0,0.0
2020-05-24 23:59:00,0.00000137,0.00000137,0.00000137,0.00000137,0,0.0,0.0


Unnamed: 0,open,high,low,close,count,volume,quantity
2019-06-26 00:00:00,9.64459735e+05,9.64459735e+05,9.64459735e+05,9.64459735e+05,0,0.0,0.0
2019-06-26 00:01:00,9.64459735e+05,9.64459735e+05,9.64459735e+05,9.64459735e+05,0,0.0,0.0
2019-06-26 00:02:00,9.64459735e+05,9.64459735e+05,9.64459735e+05,9.64459735e+05,0,0.0,0.0
2019-06-26 00:03:00,9.64459735e+05,9.64459735e+05,9.64459735e+05,9.64459735e+05,0,0.0,0.0
2019-06-26 00:04:00,9.64459735e+05,9.64459735e+05,9.64459735e+05,9.64459735e+05,0,0.0,0.0
...,...,...,...,...,...,...,...
2020-05-24 23:56:00,1.19999800e+06,1.19999800e+06,1.19999800e+06,1.19999800e+06,0,0.0,0.0
2020-05-24 23:57:00,1.19999800e+06,1.19999800e+06,1.19999800e+06,1.19999800e+06,0,0.0,0.0
2020-05-24 23:58:00,1.19999800e+06,1.19999800e+06,1.19999800e+06,1.19999800e+06,0,0.0,0.0
2020-05-24 23:59:00,1.19999800e+06,1.19999800e+06,1.19999800e+06,1.19999800e+06,0,0.0,0.0


Unnamed: 0,open,high,low,close,count,volume,quantity
2019-06-26 00:00:00,0.03020000,0.03020000,0.03020000,0.03020000,0,0.0,0.0
2019-06-26 00:01:00,0.03020000,0.03020000,0.03020000,0.03020000,0,0.0,0.0
2019-06-26 00:02:00,0.03020000,0.03020000,0.03020000,0.03020000,0,0.0,0.0
2019-06-26 00:03:00,0.03020000,0.03020000,0.03020000,0.03020000,0,0.0,0.0
2019-06-26 00:04:00,0.03020000,0.03020000,0.03020000,0.03020000,0,0.0,0.0
...,...,...,...,...,...,...,...
2020-05-24 23:56:00,0.02894139,0.02894139,0.02894139,0.02894139,0,0.0,0.0
2020-05-24 23:57:00,0.02894139,0.02894139,0.02894139,0.02894139,0,0.0,0.0
2020-05-24 23:58:00,0.02894139,0.02894139,0.02894139,0.02894139,0,0.0,0.0
2020-05-24 23:59:00,0.02894139,0.02894139,0.02894139,0.02894139,0,0.0,0.0


Unnamed: 0,open,high,low,close,count,volume,quantity
2019-06-26 00:00:00,29000.00000000,29000.00000000,29000.00000000,29000.00000000,0,0.0,0.0
2019-06-26 00:01:00,29000.00000000,29000.00000000,29000.00000000,29000.00000000,0,0.0,0.0
2019-06-26 00:02:00,29000.00000000,29000.00000000,29000.00000000,29000.00000000,0,0.0,0.0
2019-06-26 00:03:00,29000.00000000,29000.00000000,29000.00000000,29000.00000000,0,0.0,0.0
2019-06-26 00:04:00,29000.00000000,29000.00000000,29000.00000000,29000.00000000,0,0.0,0.0
...,...,...,...,...,...,...,...
2020-05-24 23:56:00,26999.98999999,26999.98999999,26999.98999999,26999.98999999,0,0.0,0.0
2020-05-24 23:57:00,26999.98999999,26999.98999999,26999.98999999,26999.98999999,0,0.0,0.0
2020-05-24 23:58:00,26999.98999999,26999.98999999,26999.98999999,26999.98999999,0,0.0,0.0
2020-05-24 23:59:00,26999.98999999,26999.98999999,26999.98999999,26999.98999999,0,0.0,0.0


Unnamed: 0,open,high,low,close,count,volume,quantity
2019-06-26 00:00:00,19.00000000,19.00000000,19.00000000,19.00000000,0,0.0,0.0
2019-06-26 00:01:00,19.00000000,19.00000000,19.00000000,19.00000000,0,0.0,0.0
2019-06-26 00:02:00,19.00000000,19.00000000,19.00000000,19.00000000,0,0.0,0.0
2019-06-26 00:03:00,19.00000000,19.00000000,19.00000000,19.00000000,0,0.0,0.0
2019-06-26 00:04:00,19.00000000,19.00000000,19.00000000,19.00000000,0,0.0,0.0
...,...,...,...,...,...,...,...
2020-05-24 23:56:00,0.72499999,0.72499999,0.72499999,0.72499999,0,0.0,0.0
2020-05-24 23:57:00,0.72499999,0.72499999,0.72499999,0.72499999,0,0.0,0.0
2020-05-24 23:58:00,0.72499999,0.72499999,0.72499999,0.72499999,0,0.0,0.0
2020-05-24 23:59:00,0.72499999,0.72499999,0.72499999,0.72499999,0,0.0,0.0


Unnamed: 0,open,high,low,close,count,volume,quantity
2019-06-26 00:00:00,36.90000000,36.90000000,36.90000000,36.90000000,0,0.0,0.0
2019-06-26 00:01:00,36.90000000,36.90000000,36.90000000,36.90000000,0,0.0,0.0
2019-06-26 00:02:00,36.90000000,36.90000000,36.90000000,36.90000000,0,0.0,0.0
2019-06-26 00:03:00,36.90000000,36.90000000,36.90000000,36.90000000,0,0.0,0.0
2019-06-26 00:04:00,36.90000000,36.90000000,36.90000000,36.90000000,0,0.0,0.0
...,...,...,...,...,...,...,...
2020-05-24 23:56:00,1.74999986,1.74999986,1.74999986,1.74999986,0,0.0,0.0
2020-05-24 23:57:00,1.74999986,1.74999986,1.74999986,1.74999986,0,0.0,0.0
2020-05-24 23:58:00,1.74999986,1.74999986,1.74999986,1.74999986,0,0.0,0.0
2020-05-24 23:59:00,1.74999986,1.74999986,1.74999986,1.74999986,0,0.0,0.0


In [540]:
dataset_df = pd.DataFrame(index=['BTC', 'SFX_BTC', 'BTC_RSD', 'ETH_BTC', 
                                 'ETH_RSD', 'SFT_RSD', 'SFX_RSD'], 
                          columns=SFX_BTC_df.columns)
BTC = np.ones_like(df_dict['SFX_BTC']['close'])
dataset_df.loc['BTC'] = pd.Series({'open':BTC,
                                   'high':BTC,
                                   'low':BTC,
                                   'close':BTC,
                                   'count':BTC,
                                   'volume':BTC,
                                   'quantity':BTC})
for i, k in enumerate(df_dict):
    dataset_df.loc[k] = pd.Series({'open':df_dict[k]['open'].to_numpy(),
                                   'high':df_dict[k]['high'].to_numpy(),
                                   'low':df_dict[k]['low'].to_numpy(), 
                                   'close':df_dict[k]['close'].to_numpy(),
                                   'count':df_dict[k]['count'].to_numpy(),
                                   'volume':df_dict[k]['volume'].to_numpy(),
                                   'quantity':df_dict[k]['quantity'].to_numpy()})


In [541]:
dataset_df

Unnamed: 0,open,high,low,close,count,volume,quantity
BTC,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
SFX_BTC,"[4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-...","[4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-...","[4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-...","[4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-06, 4e-...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
BTC_RSD,"[964459.73547203, 964459.73547203, 964459.7354...","[964459.73547203, 964459.73547203, 964459.7354...","[964459.73547203, 964459.73547203, 964459.7354...","[964459.73547203, 964459.73547203, 964459.7354...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
ETH_BTC,"[0.0302, 0.0302, 0.0302, 0.0302, 0.0302, 0.030...","[0.0302, 0.0302, 0.0302, 0.0302, 0.0302, 0.030...","[0.0302, 0.0302, 0.0302, 0.0302, 0.0302, 0.030...","[0.0302, 0.0302, 0.0302, 0.0302, 0.0302, 0.030...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
ETH_RSD,"[29000.0, 29000.0, 29000.0, 29000.0, 29000.0, ...","[29000.0, 29000.0, 29000.0, 29000.0, 29000.0, ...","[29000.0, 29000.0, 29000.0, 29000.0, 29000.0, ...","[29000.0, 29000.0, 29000.0, 29000.0, 29000.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
SFT_RSD,"[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....","[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....","[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....","[19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
SFX_RSD,"[36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36....","[36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36....","[36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36....","[36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36.9, 36....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [542]:
close_value = np.stack(dataset_df['close'].to_numpy(), axis=1)
open_value = np.stack(dataset_df['open'].to_numpy(), axis=1)
high_value = np.stack(dataset_df['high'].to_numpy(), axis=1)
low_value = np.stack(dataset_df['low'].to_numpy(), axis=1)

In [543]:
V_t = (open_value/close_value[-1])
V_t_high = (high_value/close_value[-1])
V_t_low = (low_value/close_value[-1])

In [546]:
X_t = np.stack([V_t, V_t_high, V_t_low], axis=0).T

In [547]:
X_t.shape

(7, 480961, 3)