# Crypto Data
## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [2]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

cols = ["Datetime", "Adj Close", "Volume"]
num_features = len(cols) - 1


dfs = []
for ticker in tickers:
    df = download_data(ticker, cols, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,Adj Close BTC,Volume BTC,Adj Close ETH,Volume ETH,Adj Close XRP,Volume XRP,Adj Close BNB,Volume BNB,Adj Close SOL,Volume SOL,Adj Close TRX,Volume TRX,Adj Close DOG,Volume DOG,Adj Close BCH,Volume BCH,Adj Close ADA,Volume ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2025-02-19 00:00:00+00:00,95269.75,0,2668.153564,0,2.538271,0,646.44043,0,168.760681,0,0.241726,0,0.250638,0,315.277252,0,0.751133,0
2025-02-19 01:00:00+00:00,95403.398438,380502016,2667.289551,168294400,2.531942,50336768,639.447266,37487872,168.093979,22723072,0.241651,891520,0.250279,7794816,313.945831,0,0.741853,11671360
2025-02-19 02:00:00+00:00,95735.453125,1045356544,2690.184082,754515968,2.56774,95553536,647.165771,29963904,169.312378,7425024,0.243044,7484864,0.252883,6817024,315.845337,1668976,0.755151,17090880
2025-02-19 03:00:00+00:00,95655.007812,0,2692.435791,0,2.554245,59825664,649.266052,13365632,167.428192,17790976,0.243393,0,0.252757,0,314.958649,394912,0.752751,0
2025-02-19 04:00:00+00:00,95249.117188,234881024,2667.696045,0,2.522813,9893888,648.201843,2076928,164.332123,4307968,0.242043,2760384,0.250211,0,312.219086,0,0.740304,0


In [4]:
# Save
np.save("stgan/STGAN/crypto_volume/data/data.npy", df)

In [5]:
# Load
data_tensor = torch.tensor(np.load("stgan/STGAN/crypto_volume/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8593, 18])

In [6]:
data_tensor

tensor([[9.5270e+04, 0.0000e+00, 2.6682e+03,  ..., 0.0000e+00, 7.5113e-01,
         0.0000e+00],
        [9.5403e+04, 3.8050e+08, 2.6673e+03,  ..., 0.0000e+00, 7.4185e-01,
         1.1671e+07],
        [9.5735e+04, 1.0454e+09, 2.6902e+03,  ..., 1.6690e+06, 7.5515e-01,
         1.7091e+07],
        ...,
        [6.8149e+04, 0.0000e+00, 2.0159e+03,  ..., 6.6458e+05, 2.8497e-01,
         0.0000e+00],
        [6.7750e+04, 3.5860e+07, 1.9952e+03,  ..., 2.5738e+06, 2.8306e-01,
         2.4896e+05],
        [6.7680e+04, 2.2038e+08, 1.9878e+03,  ..., 2.0224e+04, 2.8341e-01,
         5.6064e+04]])

In [7]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1]//num_features, num_features, 1])
np.save("stgan/STGAN/crypto_volume/data/data.npy", data_tensor.numpy())

In [8]:
data_tensor.shape

torch.Size([8593, 9, 2, 1])

## Distances: Use Correlations

In [9]:
df[[col for col in df.columns if "Adj Close" in col]].corr()

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,1.0,0.74407,0.844396,0.45603,0.882506,0.702831,0.793956,0.360843,0.683087
Adj Close ETH,0.74407,1.0,0.701173,0.805253,0.800555,0.924147,0.5618,0.73647,0.382871
Adj Close XRP,0.844396,0.701173,1.0,0.314243,0.853641,0.610707,0.891633,0.219497,0.845261
Adj Close BNB,0.45603,0.805253,0.314243,1.0,0.553236,0.699598,0.211905,0.699578,-0.000535
Adj Close SOL,0.882506,0.800555,0.853641,0.553236,1.0,0.685945,0.891786,0.312377,0.770447
Adj Close TRX,0.702831,0.924147,0.610707,0.699598,0.685945,1.0,0.437572,0.807519,0.276544
Adj Close DOG,0.793956,0.5618,0.891633,0.211905,0.891786,0.437572,1.0,-0.009662,0.924476
Adj Close BCH,0.360843,0.73647,0.219497,0.699578,0.312377,0.807519,-0.009662,1.0,-0.213624
Adj Close ADA,0.683087,0.382871,0.845261,-0.000535,0.770447,0.276544,0.924476,-0.213624,1.0


In [10]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df[[col for col in df.columns if "Adj Close" in col]].corr()

In [11]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.25593,0.155604,0.54397,0.117494,0.297169,0.206044,0.639157,0.316913
Adj Close ETH,0.25593,0.0,0.298827,0.194747,0.199445,0.075853,0.4382,0.26353,0.617129
Adj Close XRP,0.155604,0.298827,0.0,0.685757,0.146359,0.389293,0.108367,0.780503,0.154739
Adj Close BNB,0.54397,0.194747,0.685757,0.0,0.446764,0.300402,0.788095,0.300422,1.000535
Adj Close SOL,0.117494,0.199445,0.146359,0.446764,0.0,0.314055,0.108214,0.687623,0.229553
Adj Close TRX,0.297169,0.075853,0.389293,0.300402,0.314055,0.0,0.562428,0.192481,0.723456
Adj Close DOG,0.206044,0.4382,0.108367,0.788095,0.108214,0.562428,0.0,1.009662,0.075524
Adj Close BCH,0.639157,0.26353,0.780503,0.300422,0.687623,0.192481,1.009662,0.0,1.213624
Adj Close ADA,0.316913,0.617129,0.154739,1.000535,0.229553,0.723456,0.075524,1.213624,0.0


In [12]:
dists.to_csv("stgan/STGAN/crypto_volume/data/node_dist.txt", sep=" ", header=False, index=False)

In [13]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.25593,0.155604,0.54397,0.117494,0.297169,0.206044,0.639157,0.316913
1,0.25593,0.0,0.298827,0.194747,0.199445,0.075853,0.4382,0.26353,0.617129
2,0.155604,0.298827,0.0,0.685757,0.146359,0.389293,0.108367,0.780503,0.154739
3,0.54397,0.194747,0.685757,0.0,0.446764,0.300402,0.788095,0.300422,1.000535
4,0.117494,0.199445,0.146359,0.446764,0.0,0.314055,0.108214,0.687623,0.229553
5,0.297169,0.075853,0.389293,0.300402,0.314055,0.0,0.562428,0.192481,0.723456
6,0.206044,0.4382,0.108367,0.788095,0.108214,0.562428,0.0,1.009662,0.075524
7,0.639157,0.26353,0.780503,0.300422,0.687623,0.192481,1.009662,0.0,1.213624
8,0.316913,0.617129,0.154739,1.000535,0.229553,0.723456,0.075524,1.213624,0.0


## Adjacency Matrix

In [14]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,5,7,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,4,2,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [15]:
adj.to_csv("stgan/STGAN/crypto_volume/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [16]:
stdev = np.array(dists).std()
stdev

0.3030338477193206

In [17]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.490035,0.768229,0.039863,0.860423,0.382254,0.629824,0.01169377,0.3349761
1,0.490035,0.0,0.378164,0.661658,0.648447,0.939267,0.123558,0.4694142,0.0158063
2,0.768229,0.378164,0.0,0.00597,0.79194,0.191987,0.879956,0.001315058,0.7704774
3,0.039863,0.661658,0.00597,0.0,0.11377,0.374296,0.001155,0.3742472,1.843248e-05
4,0.860423,0.648447,0.79194,0.11377,0.0,0.341617,0.880273,0.005805548,0.5633643
5,0.382254,0.939267,0.191987,0.374296,0.341617,0.0,0.031914,0.6680104,0.0033474
6,0.629824,0.123558,0.879956,0.001155,0.880273,0.031914,0.0,1.509443e-05,0.9397753
7,0.011694,0.469414,0.001315,0.374247,0.005806,0.66801,1.5e-05,0.0,1.08195e-07
8,0.334976,0.015806,0.770477,1.8e-05,0.563364,0.003347,0.939775,1.08195e-07,0.0


In [18]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [19]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

  node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())


torch.Size([9, 9, 9])

In [20]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.6042e-01, 7.6823e-01, 6.2982e-01, 4.9003e-01, 3.8225e-01,
         3.3498e-01, 3.9863e-02, 1.1694e-02],
        [8.6042e-01, 0.0000e+00, 7.9194e-01, 8.8027e-01, 6.4845e-01, 3.4162e-01,
         5.6336e-01, 1.1377e-01, 5.8055e-03],
        [7.6823e-01, 7.9194e-01, 0.0000e+00, 8.7996e-01, 3.7816e-01, 1.9199e-01,
         7.7048e-01, 5.9698e-03, 1.3151e-03],
        [6.2982e-01, 8.8027e-01, 8.7996e-01, 0.0000e+00, 1.2356e-01, 3.1914e-02,
         9.3978e-01, 1.1551e-03, 1.5094e-05],
        [4.9003e-01, 6.4845e-01, 3.7816e-01, 1.2356e-01, 0.0000e+00, 9.3927e-01,
         1.5806e-02, 6.6166e-01, 4.6941e-01],
        [3.8225e-01, 3.4162e-01, 1.9199e-01, 3.1914e-02, 9.3927e-01, 0.0000e+00,
         3.3474e-03, 3.7430e-01, 6.6801e-01],
        [3.3498e-01, 5.6336e-01, 7.7048e-01, 9.3978e-01, 1.5806e-02, 3.3474e-03,
         0.0000e+00, 1.8432e-05, 1.0819e-07],
        [3.9863e-02, 1.1377e-01, 5.9698e-03, 1.1551e-03, 6.6166e-01, 3.7430e-01,
         1.8432e-05, 0.0000e+0

In [21]:
np.save("stgan/STGAN/crypto_volume/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [22]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [23]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [24]:
# Day one-hot encoding
week_length = 7
starting_day = 2 # The starting day, Feb 19 2025, is Wednesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [25]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8588,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8589,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
8590,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
8591,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0


In [26]:
time_features.to_csv("stgan/STGAN/crypto_volume/data/time_features.txt", sep=" ", header=False, index=False)