# Crypto Data
## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [2]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

# cols = ["Datetime", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
cols = ["Datetime", "Adj Close"]
num_features = len(cols) - 1


dfs = []
for ticker in tickers:
    df = download_data(ticker, cols, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-02-19 00:00:00+00:00,95269.75,2668.153564,2.538271,646.44043,168.760681,0.241726,0.250638,315.277252,0.751133
2025-02-19 01:00:00+00:00,95403.398438,2667.289551,2.531942,639.447266,168.093979,0.241651,0.250279,313.945831,0.741853
2025-02-19 02:00:00+00:00,95735.453125,2690.184082,2.56774,647.165771,169.312378,0.243044,0.252883,315.845337,0.755151
2025-02-19 03:00:00+00:00,95655.007812,2692.435791,2.554245,649.266052,167.428192,0.243393,0.252757,314.958649,0.752751
2025-02-19 04:00:00+00:00,95249.117188,2667.696045,2.522813,648.201843,164.332123,0.242043,0.250211,312.219086,0.740304


In [4]:
# Save
np.save("stgan/STGAN/crypto/data/data.npy", df)

In [5]:
# Load
data_tensor = torch.tensor(np.load("stgan/STGAN/crypto/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8583, 9])

In [6]:
data_tensor

tensor([[9.5270e+04, 2.6682e+03, 2.5383e+00,  ..., 2.5064e-01, 3.1528e+02,
         7.5113e-01],
        [9.5403e+04, 2.6673e+03, 2.5319e+00,  ..., 2.5028e-01, 3.1395e+02,
         7.4185e-01],
        [9.5735e+04, 2.6902e+03, 2.5677e+00,  ..., 2.5288e-01, 3.1585e+02,
         7.5515e-01],
        ...,
        [6.7507e+04, 1.9925e+03, 1.4744e+00,  ..., 1.0063e-01, 5.6292e+02,
         2.8097e-01],
        [6.7291e+04, 1.9813e+03, 1.4697e+00,  ..., 1.0049e-01, 5.6122e+02,
         2.8008e-01],
        [6.6884e+04, 1.9691e+03, 1.4679e+00,  ..., 1.0020e-01, 5.5991e+02,
         2.7951e-01]])

In [7]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1]//num_features, num_features, 1])
np.save("stgan/STGAN/crypto/data/data.npy", data_tensor.numpy())

In [8]:
data_tensor.shape

torch.Size([8583, 9, 1, 1])

## Distances: Use Correlations

In [9]:
df[[col for col in df.columns if "Adj Close" in col]].corr()

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,1.0,0.743896,0.84361,0.455061,0.881875,0.704507,0.79297,0.364053,0.68148
Adj Close ETH,0.743896,1.0,0.700692,0.805012,0.800429,0.924598,0.560896,0.738212,0.381438
Adj Close XRP,0.84361,0.700692,1.0,0.312866,0.852936,0.611679,0.891156,0.221774,0.844563
Adj Close BNB,0.455061,0.805012,0.312866,1.0,0.552561,0.699782,0.21029,0.701026,-0.00276
Adj Close SOL,0.881875,0.800429,0.852936,0.552561,1.0,0.687213,0.891299,0.315037,0.769373
Adj Close TRX,0.704507,0.924598,0.611679,0.699782,0.687213,1.0,0.438075,0.807957,0.276714
Adj Close DOG,0.79297,0.560896,0.891156,0.21029,0.891299,0.438075,1.0,-0.008056,0.924149
Adj Close BCH,0.364053,0.738212,0.221774,0.701026,0.315037,0.807957,-0.008056,1.0,-0.212503
Adj Close ADA,0.68148,0.381438,0.844563,-0.00276,0.769373,0.276714,0.924149,-0.212503,1.0


In [10]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df[[col for col in df.columns if "Adj Close" in col]].corr()

In [11]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.256104,0.15639,0.544939,0.118125,0.295493,0.20703,0.635947,0.31852
Adj Close ETH,0.256104,0.0,0.299308,0.194988,0.199571,0.075402,0.439104,0.261788,0.618562
Adj Close XRP,0.15639,0.299308,0.0,0.687134,0.147064,0.388321,0.108844,0.778226,0.155437
Adj Close BNB,0.544939,0.194988,0.687134,0.0,0.447439,0.300218,0.78971,0.298974,1.00276
Adj Close SOL,0.118125,0.199571,0.147064,0.447439,0.0,0.312787,0.108701,0.684963,0.230627
Adj Close TRX,0.295493,0.075402,0.388321,0.300218,0.312787,0.0,0.561925,0.192043,0.723286
Adj Close DOG,0.20703,0.439104,0.108844,0.78971,0.108701,0.561925,0.0,1.008056,0.075851
Adj Close BCH,0.635947,0.261788,0.778226,0.298974,0.684963,0.192043,1.008056,0.0,1.212503
Adj Close ADA,0.31852,0.618562,0.155437,1.00276,0.230627,0.723286,0.075851,1.212503,0.0


In [12]:
dists.to_csv("stgan/STGAN/crypto/data/node_dist.txt", sep=" ", header=False, index=False)

In [13]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.256104,0.15639,0.544939,0.118125,0.295493,0.20703,0.635947,0.31852
1,0.256104,0.0,0.299308,0.194988,0.199571,0.075402,0.439104,0.261788,0.618562
2,0.15639,0.299308,0.0,0.687134,0.147064,0.388321,0.108844,0.778226,0.155437
3,0.544939,0.194988,0.687134,0.0,0.447439,0.300218,0.78971,0.298974,1.00276
4,0.118125,0.199571,0.147064,0.447439,0.0,0.312787,0.108701,0.684963,0.230627
5,0.295493,0.075402,0.388321,0.300218,0.312787,0.0,0.561925,0.192043,0.723286
6,0.20703,0.439104,0.108844,0.78971,0.108701,0.561925,0.0,1.008056,0.075851
7,0.635947,0.261788,0.778226,0.298974,0.684963,0.192043,1.008056,0.0,1.212503
8,0.31852,0.618562,0.155437,1.00276,0.230627,0.723286,0.075851,1.212503,0.0


## Adjacency Matrix

In [14]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,7,5,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,4,2,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [15]:
adj.to_csv("stgan/STGAN/crypto/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [16]:
stdev = np.array(dists).std()
stdev

0.3028367920624965

In [17]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.489103,0.765912,0.039242,0.85886,0.385934,0.626655,0.01215686,0.3307954
1,0.489103,0.0,0.376504,0.660624,0.647726,0.939888,0.122162,0.4736547,0.01542062
2,0.765912,0.376504,0.0,0.005809,0.789917,0.19316,0.878816,0.001355208,0.7684
3,0.039242,0.660624,0.005809,0.0,0.112705,0.374269,0.001114,0.3773229,1.731072e-05
4,0.85886,0.647726,0.789917,0.112705,0.0,0.344111,0.879116,0.006000946,0.5599184
5,0.385934,0.939888,0.19316,0.374269,0.344111,0.0,0.031968,0.6688853,0.003331607
6,0.626655,0.122162,0.878816,0.001114,0.879116,0.031968,0.0,1.541263e-05,0.9391935
7,0.012157,0.473655,0.001355,0.377323,0.006001,0.668885,1.5e-05,0.0,1.091489e-07
8,0.330795,0.015421,0.7684,1.7e-05,0.559918,0.003332,0.939194,1.091489e-07,0.0


In [18]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [19]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

  node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())


torch.Size([9, 9, 9])

In [20]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.5886e-01, 7.6591e-01, 6.2666e-01, 4.8910e-01, 3.8593e-01,
         3.3080e-01, 3.9242e-02, 1.2157e-02],
        [8.5886e-01, 0.0000e+00, 7.8992e-01, 8.7912e-01, 6.4773e-01, 3.4411e-01,
         5.5992e-01, 1.1271e-01, 6.0009e-03],
        [7.6591e-01, 7.8992e-01, 0.0000e+00, 8.7882e-01, 3.7650e-01, 1.9316e-01,
         7.6840e-01, 5.8092e-03, 1.3552e-03],
        [6.2666e-01, 8.7912e-01, 8.7882e-01, 0.0000e+00, 1.2216e-01, 3.1968e-02,
         9.3919e-01, 1.1136e-03, 1.5413e-05],
        [4.8910e-01, 6.4773e-01, 3.7650e-01, 1.2216e-01, 0.0000e+00, 9.3989e-01,
         1.5421e-02, 6.6062e-01, 4.7365e-01],
        [3.8593e-01, 3.4411e-01, 1.9316e-01, 3.1968e-02, 9.3989e-01, 0.0000e+00,
         3.3316e-03, 3.7427e-01, 6.6889e-01],
        [3.3080e-01, 5.5992e-01, 7.6840e-01, 9.3919e-01, 1.5421e-02, 3.3316e-03,
         0.0000e+00, 1.7311e-05, 1.0915e-07],
        [3.9242e-02, 1.1271e-01, 5.8092e-03, 1.1136e-03, 6.6062e-01, 3.7427e-01,
         1.7311e-05, 0.0000e+0

In [21]:
np.save("stgan/STGAN/crypto/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [22]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [23]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [24]:
# Day one-hot encoding
week_length = 7
starting_day = 2 # The starting day, Feb 19 2025, is Wednesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [25]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8578,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8579,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8580,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8581,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [26]:
time_features.to_csv("stgan/STGAN/crypto/data/time_features.txt", sep=" ", header=False, index=False)