# Crypto Data
## Setup

In [81]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [82]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"
num_features = 6 # OHLC, Adj, Vol

dfs = []
for ticker in tickers:
    df = download_data(ticker, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [83]:
df.head()

Unnamed: 0_level_0,Open BTC,High BTC,Low BTC,Close BTC,Adj Close BTC,Volume BTC,Open ETH,High ETH,Low ETH,Close ETH,...,Low BCH,Close BCH,Adj Close BCH,Volume BCH,Open ADA,High ADA,Low ADA,Close ADA,Adj Close ADA,Volume ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-02-18 00:00:00+00:00,95839.125,96014.375,95567.773438,95595.15625,95595.15625,0,2743.781494,2753.995117,2718.82373,2720.188721,...,322.281342,322.756653,322.756653,0,0.809442,0.809931,0.793655,0.793655,0.793655,0
2025-02-18 01:00:00+00:00,95613.632812,96265.679688,95613.632812,96265.679688,96265.679688,0,2720.373047,2737.973633,2717.732666,2737.535645,...,322.412567,324.24646,324.24646,743520,0.793493,0.801217,0.792209,0.801217,0.801217,0
2025-02-18 02:00:00+00:00,96261.15625,96311.9375,96037.515625,96054.390625,96054.390625,685152256,2736.202393,2736.535645,2711.477783,2712.977783,...,323.287537,323.67984,323.67984,1671104,0.801736,0.802789,0.797374,0.797382,0.797382,0
2025-02-18 03:00:00+00:00,96046.226562,96284.421875,96046.226562,96192.21875,96192.21875,0,2712.811523,2722.069336,2710.761719,2711.736328,...,323.51828,323.861359,323.861359,1832448,0.797386,0.811233,0.797386,0.808396,0.808396,9631488
2025-02-18 04:00:00+00:00,96186.234375,96287.539062,95987.539062,96056.078125,96056.078125,538640384,2711.726318,2716.675781,2693.110352,2702.031982,...,321.972565,321.980377,321.980377,0,0.808445,0.812533,0.804836,0.80544,0.80544,0


In [84]:
# Save
np.save("stgan/STGAN/crypto/data/data.npy", df)

In [85]:
# Load
data_tensor = torch.tensor(np.load("stgan/STGAN/crypto/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8605, 54])

In [86]:
data_tensor

tensor([[9.5839e+04, 9.6014e+04, 9.5568e+04,  ..., 7.9366e-01, 7.9366e-01,
         0.0000e+00],
        [9.5614e+04, 9.6266e+04, 9.5614e+04,  ..., 8.0122e-01, 8.0122e-01,
         0.0000e+00],
        [9.6261e+04, 9.6312e+04, 9.6038e+04,  ..., 7.9738e-01, 7.9738e-01,
         0.0000e+00],
        ...,
        [6.7764e+04, 6.7891e+04, 6.7510e+04,  ..., 2.8201e-01, 2.8201e-01,
         2.9485e+05],
        [6.7638e+04, 6.7736e+04, 6.7492e+04,  ..., 2.8108e-01, 2.8108e-01,
         6.3242e+05],
        [6.7510e+04, 6.7578e+04, 6.7389e+04,  ..., 2.8122e-01, 2.8122e-01,
         2.8977e+06]])

In [87]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1]//num_features, num_features, 1])
np.save("stgan/STGAN/crypto/data/data.npy", data_tensor.numpy())

In [88]:
data_tensor.shape

torch.Size([8605, 9, 6, 1])

## Distances: Use Correlations

In [89]:
df[[col for col in df.columns if "Adj Close" in col]].corr()

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,1.0,0.74394,0.842713,0.4551,0.881189,0.704063,0.788658,0.364692,0.679902
Adj Close ETH,0.74394,1.0,0.699799,0.80493,0.799762,0.923435,0.557105,0.73725,0.380013
Adj Close XRP,0.842713,0.699799,1.0,0.311262,0.852839,0.608315,0.889742,0.219228,0.844461
Adj Close BNB,0.4551,0.80493,0.311262,1.0,0.551125,0.700393,0.205662,0.701648,-0.004814
Adj Close SOL,0.881189,0.799762,0.852839,0.551125,1.0,0.68418,0.889514,0.312717,0.769136
Adj Close TRX,0.704063,0.923435,0.608315,0.700393,0.68418,1.0,0.429468,0.80922,0.272935
Adj Close DOG,0.788658,0.557105,0.889742,0.205662,0.889514,0.429468,1.0,-0.014946,0.923462
Adj Close BCH,0.364692,0.73725,0.219228,0.701648,0.312717,0.80922,-0.014946,1.0,-0.2147
Adj Close ADA,0.679902,0.380013,0.844461,-0.004814,0.769136,0.272935,0.923462,-0.2147,1.0


In [90]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df[[col for col in df.columns if "Adj Close" in col]].corr()

In [91]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.25606,0.157287,0.5449,0.118811,0.295937,0.211342,0.635308,0.320098
Adj Close ETH,0.25606,0.0,0.300201,0.19507,0.200238,0.076565,0.442895,0.26275,0.619987
Adj Close XRP,0.157287,0.300201,0.0,0.688738,0.147161,0.391685,0.110258,0.780772,0.155539
Adj Close BNB,0.5449,0.19507,0.688738,0.0,0.448875,0.299607,0.794338,0.298352,1.004814
Adj Close SOL,0.118811,0.200238,0.147161,0.448875,0.0,0.31582,0.110486,0.687283,0.230864
Adj Close TRX,0.295937,0.076565,0.391685,0.299607,0.31582,0.0,0.570532,0.19078,0.727065
Adj Close DOG,0.211342,0.442895,0.110258,0.794338,0.110486,0.570532,0.0,1.014946,0.076538
Adj Close BCH,0.635308,0.26275,0.780772,0.298352,0.687283,0.19078,1.014946,0.0,1.2147
Adj Close ADA,0.320098,0.619987,0.155539,1.004814,0.230864,0.727065,0.076538,1.2147,0.0


In [92]:
dists.to_csv("stgan/STGAN/crypto/data/node_dist.txt", sep=" ", header=False, index=False)

In [93]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.25606,0.157287,0.5449,0.118811,0.295937,0.211342,0.635308,0.320098
1,0.25606,0.0,0.300201,0.19507,0.200238,0.076565,0.442895,0.26275,0.619987
2,0.157287,0.300201,0.0,0.688738,0.147161,0.391685,0.110258,0.780772,0.155539
3,0.5449,0.19507,0.688738,0.0,0.448875,0.299607,0.794338,0.298352,1.004814
4,0.118811,0.200238,0.147161,0.448875,0.0,0.31582,0.110486,0.687283,0.230864
5,0.295937,0.076565,0.391685,0.299607,0.31582,0.0,0.570532,0.19078,0.727065
6,0.211342,0.442895,0.110258,0.794338,0.110486,0.570532,0.0,1.014946,0.076538
7,0.635308,0.26275,0.780772,0.298352,0.687283,0.19078,1.014946,0.0,1.2147
8,0.320098,0.619987,0.155539,1.004814,0.230864,0.727065,0.076538,1.2147,0.0


## Adjacency Matrix

In [94]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,7,5,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,2,4,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [95]:
adj.to_csv("stgan/STGAN/crypto/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [96]:
stdev = np.array(dists).std()
stdev

0.303885280754123

In [97]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.491641,0.764988,0.040146,0.85825,0.387372,0.616513,0.01264253,0.3297075
1,0.491641,0.0,0.376853,0.662285,0.647793,0.938493,0.119537,0.4735035,0.01556989
2,0.764988,0.376853,0.0,0.005877,0.790956,0.189887,0.876653,0.001358605,0.7695308
3,0.040146,0.662285,0.005877,0.0,0.112829,0.37831,0.001078,0.3813957,1.785324e-05
4,0.85825,0.647793,0.790956,0.112829,0.0,0.339565,0.876176,0.006005505,0.5614932
5,0.387372,0.938493,0.189887,0.37831,0.339565,0.0,0.029456,0.6742631,0.003265418
6,0.616513,0.119537,0.876653,0.001078,0.876176,0.029456,0.0,1.430463e-05,0.9385346
7,0.012643,0.473503,0.001359,0.381396,0.006006,0.674263,1.4e-05,0.0,1.150536e-07
8,0.329708,0.01557,0.769531,1.8e-05,0.561493,0.003265,0.938535,1.150536e-07,0.0


In [98]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [99]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

torch.Size([9, 9, 9])

In [100]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.5825e-01, 7.6499e-01, 6.1651e-01, 4.9164e-01, 3.8737e-01,
         3.2971e-01, 4.0146e-02, 1.2643e-02],
        [8.5825e-01, 0.0000e+00, 7.9096e-01, 8.7618e-01, 6.4779e-01, 3.3956e-01,
         5.6149e-01, 1.1283e-01, 6.0055e-03],
        [7.6499e-01, 7.9096e-01, 0.0000e+00, 8.7665e-01, 3.7685e-01, 1.8989e-01,
         7.6953e-01, 5.8767e-03, 1.3586e-03],
        [6.1651e-01, 8.7618e-01, 8.7665e-01, 0.0000e+00, 1.1954e-01, 2.9456e-02,
         9.3853e-01, 1.0780e-03, 1.4305e-05],
        [4.9164e-01, 6.4779e-01, 3.7685e-01, 1.1954e-01, 0.0000e+00, 9.3849e-01,
         1.5570e-02, 6.6228e-01, 4.7350e-01],
        [3.8737e-01, 3.3956e-01, 1.8989e-01, 2.9456e-02, 9.3849e-01, 0.0000e+00,
         3.2654e-03, 3.7831e-01, 6.7426e-01],
        [3.2971e-01, 5.6149e-01, 7.6953e-01, 9.3853e-01, 1.5570e-02, 3.2654e-03,
         0.0000e+00, 1.7853e-05, 1.1505e-07],
        [4.0146e-02, 1.1283e-01, 5.8767e-03, 1.0780e-03, 6.6228e-01, 3.7831e-01,
         1.7853e-05, 0.0000e+0

In [101]:
np.save("stgan/STGAN/crypto/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [102]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [103]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [104]:
# Day one-hot encoding
week_length = 7
starting_day = 1 # The starting day, april 1, 2020, is Tuesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [105]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8600,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
8601,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
8602,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8603,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [106]:
time_features.to_csv("stgan/STGAN/crypto/data/time_features.txt", sep=" ", header=False, index=False)