# Crypto Data
## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [2]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

# cols = ["Datetime", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
cols = ["Datetime", "Adj Close"]
num_features = len(cols) - 1


dfs = []
for ticker in tickers:
    df = download_data(ticker, cols, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-02-19 00:00:00+00:00,95269.75,2668.153564,2.538271,646.44043,168.760681,0.241726,0.250638,315.277252,0.751133
2025-02-19 01:00:00+00:00,95403.398438,2667.289551,2.531942,639.447266,168.093979,0.241651,0.250279,313.945831,0.741853
2025-02-19 02:00:00+00:00,95735.453125,2690.184082,2.56774,647.165771,169.312378,0.243044,0.252883,315.845337,0.755151
2025-02-19 03:00:00+00:00,95655.007812,2692.435791,2.554245,649.266052,167.428192,0.243393,0.252757,314.958649,0.752751
2025-02-19 04:00:00+00:00,95249.117188,2667.696045,2.522813,648.201843,164.332123,0.242043,0.250211,312.219086,0.740304


In [4]:
# Save
np.save("stgan/STGAN/crypto/data/data.npy", df)

In [5]:
# Load
data_tensor = torch.tensor(np.load("stgan/STGAN/crypto/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8582, 9])

In [6]:
data_tensor

tensor([[9.5270e+04, 2.6682e+03, 2.5383e+00,  ..., 2.5064e-01, 3.1528e+02,
         7.5113e-01],
        [9.5403e+04, 2.6673e+03, 2.5319e+00,  ..., 2.5028e-01, 3.1395e+02,
         7.4185e-01],
        [9.5735e+04, 2.6902e+03, 2.5677e+00,  ..., 2.5288e-01, 3.1585e+02,
         7.5515e-01],
        ...,
        [6.7516e+04, 1.9936e+03, 1.4734e+00,  ..., 1.0052e-01, 5.6578e+02,
         2.8108e-01],
        [6.7507e+04, 1.9925e+03, 1.4744e+00,  ..., 1.0063e-01, 5.6292e+02,
         2.8097e-01],
        [6.7363e+04, 1.9857e+03, 1.4728e+00,  ..., 1.0075e-01, 5.6161e+02,
         2.8056e-01]])

In [7]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1]//num_features, num_features, 1])
np.save("stgan/STGAN/crypto/data/data.npy", data_tensor.numpy())

In [8]:
data_tensor.shape

torch.Size([8582, 9, 1, 1])

## Distances: Use Correlations

In [9]:
df[[col for col in df.columns if "Adj Close" in col]].corr()

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,1.0,0.743876,0.843527,0.454956,0.881809,0.704688,0.792868,0.364377,0.681313
Adj Close ETH,0.743876,1.0,0.700639,0.804986,0.800413,0.924647,0.560799,0.738387,0.381285
Adj Close XRP,0.843527,0.700639,1.0,0.312719,0.852863,0.611781,0.891106,0.221998,0.844491
Adj Close BNB,0.454956,0.804986,0.312719,1.0,0.552488,0.699802,0.210118,0.70117,-0.002994
Adj Close SOL,0.881809,0.800413,0.852863,0.552488,1.0,0.687346,0.891249,0.315299,0.769262
Adj Close TRX,0.704688,0.924647,0.611781,0.699802,0.687346,1.0,0.438127,0.807997,0.276732
Adj Close DOG,0.792868,0.560799,0.891106,0.210118,0.891249,0.438127,1.0,-0.007902,0.924116
Adj Close BCH,0.364377,0.738387,0.221998,0.70117,0.315299,0.807997,-0.007902,1.0,-0.212398
Adj Close ADA,0.681313,0.381285,0.844491,-0.002994,0.769262,0.276732,0.924116,-0.212398,1.0


In [10]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df[[col for col in df.columns if "Adj Close" in col]].corr()

In [11]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.256124,0.156473,0.545044,0.118191,0.295312,0.207132,0.635623,0.318687
Adj Close ETH,0.256124,0.0,0.299361,0.195014,0.199587,0.075353,0.439201,0.261613,0.618715
Adj Close XRP,0.156473,0.299361,0.0,0.687281,0.147137,0.388219,0.108894,0.778002,0.155509
Adj Close BNB,0.545044,0.195014,0.687281,0.0,0.447512,0.300198,0.789882,0.29883,1.002994
Adj Close SOL,0.118191,0.199587,0.147137,0.447512,0.0,0.312654,0.108751,0.684701,0.230738
Adj Close TRX,0.295312,0.075353,0.388219,0.300198,0.312654,0.0,0.561873,0.192003,0.723268
Adj Close DOG,0.207132,0.439201,0.108894,0.789882,0.108751,0.561873,0.0,1.007902,0.075884
Adj Close BCH,0.635623,0.261613,0.778002,0.29883,0.684701,0.192003,1.007902,0.0,1.212398
Adj Close ADA,0.318687,0.618715,0.155509,1.002994,0.230738,0.723268,0.075884,1.212398,0.0


In [12]:
dists.to_csv("stgan/STGAN/crypto/data/node_dist.txt", sep=" ", header=False, index=False)

In [13]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.256124,0.156473,0.545044,0.118191,0.295312,0.207132,0.635623,0.318687
1,0.256124,0.0,0.299361,0.195014,0.199587,0.075353,0.439201,0.261613,0.618715
2,0.156473,0.299361,0.0,0.687281,0.147137,0.388219,0.108894,0.778002,0.155509
3,0.545044,0.195014,0.687281,0.0,0.447512,0.300198,0.789882,0.29883,1.002994
4,0.118191,0.199587,0.147137,0.447512,0.0,0.312654,0.108751,0.684701,0.230738
5,0.295312,0.075353,0.388219,0.300198,0.312654,0.0,0.561873,0.192003,0.723268
6,0.207132,0.439201,0.108894,0.789882,0.108751,0.561873,0.0,1.007902,0.075884
7,0.635623,0.261613,0.778002,0.29883,0.684701,0.192003,1.007902,0.0,1.212398
8,0.318687,0.618715,0.155509,1.002994,0.230738,0.723268,0.075884,1.212398,0.0


## Adjacency Matrix

In [14]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,7,5,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,4,2,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [15]:
adj.to_csv("stgan/STGAN/crypto/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [16]:
stdev = np.array(dists).std()
stdev

0.3028194955068213

In [17]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.48901,0.765674,0.039179,0.8587,0.386342,0.626333,0.01220547,0.3303684
1,0.48901,0.0,0.376331,0.66052,0.647648,0.939958,0.122019,0.4740863,0.0153816
2,0.765674,0.376331,0.0,0.005793,0.78971,0.193291,0.8787,0.001359332,0.7681886
3,0.039179,0.66052,0.005793,0.0,0.112596,0.374277,0.001109,0.377636,1.720052e-05
4,0.8587,0.647648,0.78971,0.112596,0.0,0.34438,0.878998,0.006020936,0.5595685
5,0.386342,0.939958,0.193291,0.374277,0.34438,0.0,0.031976,0.6689674,0.003330388
6,0.626333,0.122019,0.8787,0.001109,0.878998,0.031976,0.0,1.544547e-05,0.9391345
7,0.012205,0.474086,0.001359,0.377636,0.006021,0.668967,1.5e-05,0.0,1.092524e-07
8,0.330368,0.015382,0.768189,1.7e-05,0.559569,0.00333,0.939134,1.092524e-07,0.0


In [18]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [19]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

  node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())


torch.Size([9, 9, 9])

In [20]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.5870e-01, 7.6567e-01, 6.2633e-01, 4.8901e-01, 3.8634e-01,
         3.3037e-01, 3.9179e-02, 1.2205e-02],
        [8.5870e-01, 0.0000e+00, 7.8971e-01, 8.7900e-01, 6.4765e-01, 3.4438e-01,
         5.5957e-01, 1.1260e-01, 6.0209e-03],
        [7.6567e-01, 7.8971e-01, 0.0000e+00, 8.7870e-01, 3.7633e-01, 1.9329e-01,
         7.6819e-01, 5.7930e-03, 1.3593e-03],
        [6.2633e-01, 8.7900e-01, 8.7870e-01, 0.0000e+00, 1.2202e-01, 3.1976e-02,
         9.3913e-01, 1.1095e-03, 1.5445e-05],
        [4.8901e-01, 6.4765e-01, 3.7633e-01, 1.2202e-01, 0.0000e+00, 9.3996e-01,
         1.5382e-02, 6.6052e-01, 4.7409e-01],
        [3.8634e-01, 3.4438e-01, 1.9329e-01, 3.1976e-02, 9.3996e-01, 0.0000e+00,
         3.3304e-03, 3.7428e-01, 6.6897e-01],
        [3.3037e-01, 5.5957e-01, 7.6819e-01, 9.3913e-01, 1.5382e-02, 3.3304e-03,
         0.0000e+00, 1.7201e-05, 1.0925e-07],
        [3.9179e-02, 1.1260e-01, 5.7930e-03, 1.1095e-03, 6.6052e-01, 3.7428e-01,
         1.7201e-05, 0.0000e+0

In [21]:
np.save("stgan/STGAN/crypto/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [22]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [23]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [24]:
# Day one-hot encoding
week_length = 7
starting_day = 1 # The starting day, april 1, 2020, is Tuesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [25]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8577,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
8578,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
8579,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
8580,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [26]:
time_features.to_csv("stgan/STGAN/crypto/data/time_features.txt", sep=" ", header=False, index=False)