# Crypto Data
## Setup

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [26]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

dfs = []
for ticker in tickers:
    df = download_data(ticker, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [27]:
df.head()

Unnamed: 0_level_0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-02-18 00:00:00+00:00,95595.15625,2720.188721,2.639237,663.075073,176.735168,0.23789,0.256625,322.756653,0.793655
2025-02-18 01:00:00+00:00,96265.679688,2737.535645,2.66556,665.820312,178.113724,0.238404,0.259204,324.24646,0.801217
2025-02-18 02:00:00+00:00,96054.390625,2712.977783,2.655882,665.452759,178.124161,0.238591,0.257527,323.67984,0.797382
2025-02-18 03:00:00+00:00,96192.21875,2711.736328,2.66052,665.182251,177.878494,0.240435,0.257556,323.861359,0.808396
2025-02-18 04:00:00+00:00,96056.078125,2702.031982,2.644653,661.123718,175.766342,0.240169,0.255951,321.980377,0.80544


In [28]:
# Save
np.save("stgan-algo/STGAN/crypto/data/data.npy", df)

In [29]:
# Load
data_tensor = torch.tensor(np.load("stgan-algo/STGAN/crypto/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8603, 9])

In [30]:
data_tensor

tensor([[9.5595e+04, 2.7202e+03, 2.6392e+00,  ..., 2.5662e-01, 3.2276e+02,
         7.9366e-01],
        [9.6266e+04, 2.7375e+03, 2.6656e+00,  ..., 2.5920e-01, 3.2425e+02,
         8.0122e-01],
        [9.6054e+04, 2.7130e+03, 2.6559e+00,  ..., 2.5753e-01, 3.2368e+02,
         7.9738e-01],
        ...,
        [6.7792e+04, 1.9988e+03, 1.4846e+00,  ..., 1.0113e-01, 5.6637e+02,
         2.8320e-01],
        [6.7736e+04, 1.9944e+03, 1.4823e+00,  ..., 1.0066e-01, 5.6825e+02,
         2.8258e-01],
        [6.7592e+04, 1.9952e+03, 1.4841e+00,  ..., 1.0076e-01, 5.6795e+02,
         2.8224e-01]])

In [31]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1], 1, 1])
np.save("stgan-algo/STGAN/crypto/data/data.npy", data_tensor.numpy())

## Distances: Use Correlations

In [32]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df.corr()

In [33]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.256094,0.157449,0.545096,0.118939,0.295574,0.211547,0.634647,0.320429
Adj Close ETH,0.256094,0.0,0.300299,0.195119,0.200266,0.076465,0.44308,0.262394,0.620279
Adj Close XRP,0.157449,0.300299,0.0,0.689019,0.147305,0.391472,0.110356,0.780306,0.155682
Adj Close BNB,0.545096,0.195119,0.689019,0.0,0.449013,0.299562,0.794668,0.298056,1.005269
Adj Close SOL,0.118939,0.200266,0.147305,0.449013,0.0,0.315551,0.110586,0.686745,0.231082
Adj Close TRX,0.295574,0.076465,0.391472,0.299562,0.315551,0.0,0.570419,0.190697,0.727016
Adj Close DOG,0.211547,0.44308,0.110356,0.794668,0.110586,0.570419,0.0,1.014621,0.076605
Adj Close BCH,0.634647,0.262394,0.780306,0.298056,0.686745,0.190697,1.014621,0.0,1.21447
Adj Close ADA,0.320429,0.620279,0.155682,1.005269,0.231082,0.727016,0.076605,1.21447,0.0


In [34]:
dists.to_csv("stgan-algo/STGAN/crypto/data/node_dist.txt", sep=" ", header=False, index=False)

In [35]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.256094,0.157449,0.545096,0.118939,0.295574,0.211547,0.634647,0.320429
1,0.256094,0.0,0.300299,0.195119,0.200266,0.076465,0.44308,0.262394,0.620279
2,0.157449,0.300299,0.0,0.689019,0.147305,0.391472,0.110356,0.780306,0.155682
3,0.545096,0.195119,0.689019,0.0,0.449013,0.299562,0.794668,0.298056,1.005269
4,0.118939,0.200266,0.147305,0.449013,0.0,0.315551,0.110586,0.686745,0.231082
5,0.295574,0.076465,0.391472,0.299562,0.315551,0.0,0.570419,0.190697,0.727016
6,0.211547,0.44308,0.110356,0.794668,0.110586,0.570419,0.0,1.014621,0.076605
7,0.634647,0.262394,0.780306,0.298056,0.686745,0.190697,1.014621,0.0,1.21447
8,0.320429,0.620279,0.155682,1.005269,0.231082,0.727016,0.076605,1.21447,0.0


## Adjacency Matrix

In [36]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,7,5,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,2,4,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [37]:
adj.to_csv("stgan-algo/STGAN/crypto/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [38]:
stdev = np.array(dists).std()
stdev

0.30384531393368724

In [39]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.491456,0.764511,0.040019,0.857932,0.388176,0.615857,0.01274334,0.3288554
1,0.491456,0.0,0.376517,0.662076,0.647641,0.938632,0.119257,0.4743697,0.01549183
2,0.764511,0.376517,0.0,0.005844,0.790546,0.190148,0.876417,0.001366987,0.7691071
3,0.040019,0.662076,0.005844,0.0,0.112613,0.378323,0.00107,0.382029,1.762675e-05
4,0.857932,0.647641,0.790546,0.112613,0.0,0.340094,0.875936,0.006045693,0.5607948
5,0.388176,0.938632,0.190148,0.378323,0.340094,0.0,0.02947,0.674423,0.003263041
6,0.615857,0.119257,0.876417,0.00107,0.875936,0.02947,0.0,1.436496e-05,0.9384147
7,0.012743,0.47437,0.001367,0.382029,0.006046,0.674423,1.4e-05,0.0,1.152654e-07
8,0.328855,0.015492,0.769107,1.8e-05,0.560795,0.003263,0.938415,1.152654e-07,0.0


In [40]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [41]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

torch.Size([9, 9, 9])

In [42]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.5793e-01, 7.6451e-01, 6.1586e-01, 4.9146e-01, 3.8818e-01,
         3.2886e-01, 4.0019e-02, 1.2743e-02],
        [8.5793e-01, 0.0000e+00, 7.9055e-01, 8.7594e-01, 6.4764e-01, 3.4009e-01,
         5.6079e-01, 1.1261e-01, 6.0457e-03],
        [7.6451e-01, 7.9055e-01, 0.0000e+00, 8.7642e-01, 3.7652e-01, 1.9015e-01,
         7.6911e-01, 5.8442e-03, 1.3670e-03],
        [6.1586e-01, 8.7594e-01, 8.7642e-01, 0.0000e+00, 1.1926e-01, 2.9470e-02,
         9.3841e-01, 1.0699e-03, 1.4365e-05],
        [4.9146e-01, 6.4764e-01, 3.7652e-01, 1.1926e-01, 0.0000e+00, 9.3863e-01,
         1.5492e-02, 6.6208e-01, 4.7437e-01],
        [3.8818e-01, 3.4009e-01, 1.9015e-01, 2.9470e-02, 9.3863e-01, 0.0000e+00,
         3.2630e-03, 3.7832e-01, 6.7442e-01],
        [3.2886e-01, 5.6079e-01, 7.6911e-01, 9.3841e-01, 1.5492e-02, 3.2630e-03,
         0.0000e+00, 1.7627e-05, 1.1527e-07],
        [4.0019e-02, 1.1261e-01, 5.8442e-03, 1.0699e-03, 6.6208e-01, 3.7832e-01,
         1.7627e-05, 0.0000e+0

In [43]:
np.save("stgan-algo/STGAN/crypto/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [44]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [45]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [46]:
# Day one-hot encoding
week_length = 7
starting_day = 1 # The starting day, april 1, 2020, is Tuesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [47]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8598,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8599,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
8600,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
8601,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0


In [48]:
time_features.to_csv("stgan-algo/STGAN/crypto/data/time_features.txt", sep=" ", header=False, index=False)