# Crypto Data
## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [2]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

cols = ["Datetime", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
num_features = len(cols) - 1


dfs = []
for ticker in tickers:
    df = download_data(ticker, cols, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,Open BTC,High BTC,Low BTC,Close BTC,Adj Close BTC,Volume BTC,Open ETH,High ETH,Low ETH,Close ETH,...,Low BCH,Close BCH,Adj Close BCH,Volume BCH,Open ADA,High ADA,Low ADA,Close ADA,Adj Close ADA,Volume ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-02-19 00:00:00+00:00,95607.867188,95709.445312,95262.734375,95269.75,95269.75,0,2669.900879,2676.345215,2666.410645,2668.153564,...,315.277252,315.277252,315.277252,0,0.752889,0.756509,0.751133,0.751133,0.751133,0
2025-02-19 01:00:00+00:00,95275.335938,95403.398438,95015.710938,95403.398438,95403.398438,380502016,2669.362061,2669.362061,2657.042725,2667.289551,...,313.380249,313.945831,313.945831,0,0.750549,0.750549,0.741542,0.741853,0.741853,11671360
2025-02-19 02:00:00+00:00,95429.359375,95891.914062,95429.359375,95735.453125,95735.453125,1045356544,2669.238525,2697.389404,2669.238525,2690.184082,...,313.996826,315.845337,315.845337,1668976,0.742649,0.755769,0.742649,0.755151,0.755151,17090880
2025-02-19 03:00:00+00:00,95747.742188,95747.742188,95568.375,95655.007812,95655.007812,0,2688.824463,2693.704102,2679.087646,2692.435791,...,314.006866,314.958649,314.958649,394912,0.755011,0.755011,0.749763,0.752751,0.752751,0
2025-02-19 04:00:00+00:00,95715.203125,95715.203125,95249.117188,95249.117188,95249.117188,234881024,2693.387695,2693.431885,2667.696045,2667.696045,...,312.219086,312.219086,312.219086,0,0.752589,0.752589,0.740268,0.740304,0.740304,0


In [6]:
# Save
np.save("stgan/STGAN/crypto_ohlc/data/data.npy", df)

In [7]:
# Load
data_tensor = torch.tensor(np.load("stgan/STGAN/crypto_ohlc/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8592, 54])

In [8]:
data_tensor

tensor([[9.5608e+04, 9.5709e+04, 9.5263e+04,  ..., 7.5113e-01, 7.5113e-01,
         0.0000e+00],
        [9.5275e+04, 9.5403e+04, 9.5016e+04,  ..., 7.4185e-01, 7.4185e-01,
         1.1671e+07],
        [9.5429e+04, 9.5892e+04, 9.5429e+04,  ..., 7.5515e-01, 7.5515e-01,
         1.7091e+07],
        ...,
        [6.8091e+04, 6.8386e+04, 6.8054e+04,  ..., 2.8515e-01, 2.8515e-01,
         4.5951e+06],
        [6.8098e+04, 6.8243e+04, 6.7999e+04,  ..., 2.8497e-01, 2.8497e-01,
         7.4640e+05],
        [6.8133e+04, 6.8145e+04, 6.7738e+04,  ..., 2.8334e-01, 2.8334e-01,
         1.1790e+06]])

In [9]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1]//num_features, num_features, 1])
np.save("stgan/STGAN/crypto_ohlc/data/data.npy", data_tensor.numpy())

In [10]:
data_tensor.shape

torch.Size([8592, 9, 6, 1])

## Distances: Use Correlations

In [11]:
df[[col for col in df.columns if "Adj Close" in col]].corr()

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,1.0,0.74405,0.844317,0.455932,0.882441,0.702993,0.793856,0.361153,0.682927
Adj Close ETH,0.74405,1.0,0.701123,0.805229,0.800542,0.924192,0.561707,0.73664,0.382724
Adj Close XRP,0.844317,0.701123,1.0,0.314104,0.853569,0.6108,0.891585,0.219715,0.845191
Adj Close BNB,0.455932,0.805229,0.314104,1.0,0.553168,0.699614,0.211741,0.699717,-0.000758
Adj Close SOL,0.882441,0.800542,0.853569,0.553168,1.0,0.68607,0.891736,0.312638,0.770339
Adj Close TRX,0.702993,0.924192,0.6108,0.699614,0.68607,1.0,0.437618,0.807562,0.276555
Adj Close DOG,0.793856,0.561707,0.891585,0.211741,0.891736,0.437618,1.0,-0.009509,0.924443
Adj Close BCH,0.361153,0.73664,0.219715,0.699717,0.312638,0.807562,-0.009509,1.0,-0.21352
Adj Close ADA,0.682927,0.382724,0.845191,-0.000758,0.770339,0.276555,0.924443,-0.21352,1.0


In [12]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df[[col for col in df.columns if "Adj Close" in col]].corr()

In [13]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.25595,0.155683,0.544068,0.117559,0.297007,0.206144,0.638847,0.317073
Adj Close ETH,0.25595,0.0,0.298877,0.194771,0.199458,0.075808,0.438293,0.26336,0.617276
Adj Close XRP,0.155683,0.298877,0.0,0.685896,0.146431,0.3892,0.108415,0.780285,0.154809
Adj Close BNB,0.544068,0.194771,0.685896,0.0,0.446832,0.300386,0.788259,0.300283,1.000758
Adj Close SOL,0.117559,0.199458,0.146431,0.446832,0.0,0.31393,0.108264,0.687362,0.229661
Adj Close TRX,0.297007,0.075808,0.3892,0.300386,0.31393,0.0,0.562382,0.192438,0.723445
Adj Close DOG,0.206144,0.438293,0.108415,0.788259,0.108264,0.562382,0.0,1.009509,0.075557
Adj Close BCH,0.638847,0.26336,0.780285,0.300283,0.687362,0.192438,1.009509,0.0,1.21352
Adj Close ADA,0.317073,0.617276,0.154809,1.000758,0.229661,0.723445,0.075557,1.21352,0.0


In [14]:
dists.to_csv("stgan/STGAN/crypto_ohlc/data/node_dist.txt", sep=" ", header=False, index=False)

In [15]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.25595,0.155683,0.544068,0.117559,0.297007,0.206144,0.638847,0.317073
1,0.25595,0.0,0.298877,0.194771,0.199458,0.075808,0.438293,0.26336,0.617276
2,0.155683,0.298877,0.0,0.685896,0.146431,0.3892,0.108415,0.780285,0.154809
3,0.544068,0.194771,0.685896,0.0,0.446832,0.300386,0.788259,0.300283,1.000758
4,0.117559,0.199458,0.146431,0.446832,0.0,0.31393,0.108264,0.687362,0.229661
5,0.297007,0.075808,0.3892,0.300386,0.31393,0.0,0.562382,0.192438,0.723445
6,0.206144,0.438293,0.108415,0.788259,0.108264,0.562382,0.0,1.009509,0.075557
7,0.638847,0.26336,0.780285,0.300283,0.687362,0.192438,1.009509,0.0,1.21352
8,0.317073,0.617276,0.154809,1.000758,0.229661,0.723445,0.075557,1.21352,0.0


## Adjacency Matrix

In [16]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,7,5,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,4,2,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [17]:
adj.to_csv("stgan/STGAN/crypto_ohlc/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [18]:
stdev = np.array(dists).std()
stdev

0.30301578315450106

In [19]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.489938,0.767999,0.039801,0.860265,0.382611,0.629507,0.0117381,0.3345617
1,0.489938,0.0,0.377996,0.661556,0.648376,0.939329,0.123417,0.4698301,0.01576742
2,0.767999,0.377996,0.0,0.005954,0.791737,0.1921,0.879843,0.001318915,0.7702721
3,0.039801,0.661556,0.005954,0.0,0.113666,0.374292,0.001151,0.3745437,1.831911e-05
4,0.860265,0.648376,0.791737,0.113666,0.0,0.341867,0.880158,0.005824629,0.5630208
5,0.382611,0.939329,0.1921,0.374292,0.341867,0.0,0.031919,0.6680979,0.003345733
6,0.629507,0.123417,0.879843,0.001151,0.880158,0.031919,0.0,1.512519e-05,0.9397176
7,0.011738,0.46983,0.001319,0.374544,0.005825,0.668098,1.5e-05,0.0,1.082851e-07
8,0.334562,0.015767,0.770272,1.8e-05,0.563021,0.003346,0.939718,1.082851e-07,0.0


In [20]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [21]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

  node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())


torch.Size([9, 9, 9])

In [22]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.6027e-01, 7.6800e-01, 6.2951e-01, 4.8994e-01, 3.8261e-01,
         3.3456e-01, 3.9801e-02, 1.1738e-02],
        [8.6027e-01, 0.0000e+00, 7.9174e-01, 8.8016e-01, 6.4838e-01, 3.4187e-01,
         5.6302e-01, 1.1367e-01, 5.8246e-03],
        [7.6800e-01, 7.9174e-01, 0.0000e+00, 8.7984e-01, 3.7800e-01, 1.9210e-01,
         7.7027e-01, 5.9538e-03, 1.3189e-03],
        [6.2951e-01, 8.8016e-01, 8.7984e-01, 0.0000e+00, 1.2342e-01, 3.1919e-02,
         9.3972e-01, 1.1509e-03, 1.5125e-05],
        [4.8994e-01, 6.4838e-01, 3.7800e-01, 1.2342e-01, 0.0000e+00, 9.3933e-01,
         1.5767e-02, 6.6156e-01, 4.6983e-01],
        [3.8261e-01, 3.4187e-01, 1.9210e-01, 3.1919e-02, 9.3933e-01, 0.0000e+00,
         3.3457e-03, 3.7429e-01, 6.6810e-01],
        [3.3456e-01, 5.6302e-01, 7.7027e-01, 9.3972e-01, 1.5767e-02, 3.3457e-03,
         0.0000e+00, 1.8319e-05, 1.0829e-07],
        [3.9801e-02, 1.1367e-01, 5.9538e-03, 1.1509e-03, 6.6156e-01, 3.7429e-01,
         1.8319e-05, 0.0000e+0

In [23]:
np.save("stgan/STGAN/crypto_ohlc/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [24]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [25]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [26]:
# Day one-hot encoding
week_length = 7
starting_day = 2 # The starting day, Feb 19 2025, is Wednesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [27]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8587,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8588,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8589,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
8590,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0


In [28]:
time_features.to_csv("stgan/STGAN/crypto_ohlc/data/time_features.txt", sep=" ", header=False, index=False)