# Crypto Data
## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [2]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

dfs = []
for ticker in tickers:
    df = download_data(ticker, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)

df = pd.concat(dfs, axis=1)
df.sort_index(inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-02-18 00:00:00+00:00,95595.15625,2720.188721,2.639237,663.075073,176.735168,0.23789,0.256625,322.756653,0.793655
2025-02-18 01:00:00+00:00,96265.679688,2737.535645,2.66556,665.820312,178.113724,0.238404,0.259204,324.24646,0.801217
2025-02-18 02:00:00+00:00,96054.390625,2712.977783,2.655882,665.452759,178.124161,0.238591,0.257527,323.67984,0.797382
2025-02-18 03:00:00+00:00,96192.21875,2711.736328,2.66052,665.182251,177.878494,0.240435,0.257556,323.861359,0.808396
2025-02-18 04:00:00+00:00,96056.078125,2702.031982,2.644653,661.123718,175.766342,0.240169,0.255951,321.980377,0.80544


In [4]:
# Save
np.save("stgan-algo/STGAN/crypto/data/data.npy", df)

In [5]:
# Load
data_tensor = torch.tensor(np.load("stgan-algo/STGAN/crypto/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8602, 9])

In [6]:
data_tensor

tensor([[9.5595e+04, 2.7202e+03, 2.6392e+00,  ..., 2.5662e-01, 3.2276e+02,
         7.9366e-01],
        [9.6266e+04, 2.7375e+03, 2.6656e+00,  ..., 2.5920e-01, 3.2425e+02,
         8.0122e-01],
        [9.6054e+04, 2.7130e+03, 2.6559e+00,  ..., 2.5753e-01, 3.2368e+02,
         7.9738e-01],
        ...,
        [6.8173e+04, 2.0036e+03, 1.4865e+00,  ..., 1.0205e-01, 5.6676e+02,
         2.8525e-01],
        [6.7792e+04, 1.9988e+03, 1.4846e+00,  ..., 1.0113e-01, 5.6637e+02,
         2.8320e-01],
        [6.7727e+04, 1.9958e+03, 1.4820e+00,  ..., 1.0080e-01, 5.6808e+02,
         2.8263e-01]])

In [7]:
data_tensor = data_tensor.reshape([data_tensor.shape[0], data_tensor.shape[1], 1, 1])
np.save("stgan-algo/STGAN/crypto/data/data.npy", data_tensor.numpy())

## Distances: Use Correlations

In [8]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df.corr()

In [9]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.256111,0.157529,0.545195,0.119003,0.295396,0.21165,0.634306,0.320594
Adj Close ETH,0.256111,0.0,0.300349,0.195143,0.20028,0.076417,0.443174,0.262209,0.620426
Adj Close XRP,0.157529,0.300349,0.0,0.689159,0.147375,0.39137,0.110405,0.780066,0.155752
Adj Close BNB,0.545195,0.195143,0.689159,0.0,0.449082,0.299541,0.794833,0.297902,1.005495
Adj Close SOL,0.119003,0.20028,0.147375,0.449082,0.0,0.315418,0.110636,0.686465,0.231191
Adj Close TRX,0.295396,0.076417,0.39137,0.299541,0.315418,0.0,0.570364,0.190651,0.726994
Adj Close DOG,0.21165,0.443174,0.110405,0.794833,0.110636,0.570364,0.0,1.01445,0.076638
Adj Close BCH,0.634306,0.262209,0.780066,0.297902,0.686465,0.190651,1.01445,0.0,1.214348
Adj Close ADA,0.320594,0.620426,0.155752,1.005495,0.231191,0.726994,0.076638,1.214348,0.0


In [10]:
dists.to_csv("stgan-algo/STGAN/crypto/data/node_dist.txt", sep=" ", header=False, index=False)

In [11]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.256111,0.157529,0.545195,0.119003,0.295396,0.21165,0.634306,0.320594
1,0.256111,0.0,0.300349,0.195143,0.20028,0.076417,0.443174,0.262209,0.620426
2,0.157529,0.300349,0.0,0.689159,0.147375,0.39137,0.110405,0.780066,0.155752
3,0.545195,0.195143,0.689159,0.0,0.449082,0.299541,0.794833,0.297902,1.005495
4,0.119003,0.20028,0.147375,0.449082,0.0,0.315418,0.110636,0.686465,0.231191
5,0.295396,0.076417,0.39137,0.299541,0.315418,0.0,0.570364,0.190651,0.726994
6,0.21165,0.443174,0.110405,0.794833,0.110636,0.570364,0.0,1.01445,0.076638
7,0.634306,0.262209,0.780066,0.297902,0.686465,0.190651,1.01445,0.0,1.214348
8,0.320594,0.620426,0.155752,1.005495,0.231191,0.726994,0.076638,1.214348,0.0


## Adjacency Matrix

In [12]:
k = 5
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4
0,0,4,2,6,1
1,1,5,3,4,0
2,2,6,4,8,0
3,3,1,7,5,4
4,4,6,0,2,1
5,5,1,7,0,3
6,6,8,2,4,0
7,7,5,1,3,0
8,8,6,2,4,0


In [13]:
adj.to_csv("stgan-algo/STGAN/crypto/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [14]:
stdev = np.array(dists).std()
stdev

0.30382393777287403

In [15]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.491359,0.764275,0.039954,0.857771,0.388567,0.615525,0.01279535,0.3284278
1,0.491359,0.0,0.376343,0.661969,0.647563,0.938699,0.119115,0.4748204,0.0154523
2,0.764275,0.376343,0.0,0.005828,0.790341,0.190268,0.876298,0.001371257,0.7688956
3,0.039954,0.661969,0.005828,0.0,0.112503,0.378323,0.001066,0.3823579,1.751308e-05
4,0.857771,0.647563,0.790341,0.112503,0.0,0.340351,0.875814,0.006066557,0.5604429
5,0.388567,0.938699,0.190268,0.378323,0.340351,0.0,0.029475,0.674513,0.003261542
6,0.615525,0.119115,0.876298,0.001066,0.875814,0.029475,0.0,1.439641e-05,0.9383542
7,0.012795,0.47482,0.001371,0.382358,0.006067,0.674513,1.4e-05,0.0,1.153772e-07
8,0.328428,0.015452,0.768896,1.8e-05,0.560443,0.003262,0.938354,1.153772e-07,0.0


In [16]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 5, 5])

In [17]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

  node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())


torch.Size([9, 5, 5])

In [18]:
node_subgraph[0,:,:]

tensor([[0.0000, 0.8578, 0.7643, 0.6155, 0.4914],
        [0.8578, 0.0000, 0.7903, 0.8758, 0.6476],
        [0.7643, 0.7903, 0.0000, 0.8763, 0.3763],
        [0.6155, 0.8758, 0.8763, 0.0000, 0.1191],
        [0.4914, 0.6476, 0.3763, 0.1191, 0.0000]])

In [19]:
np.save("stgan-algo/STGAN/crypto/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [20]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [21]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [22]:
# Day one-hot encoding
week_length = 7
starting_day = 1 # The starting day, april 1, 2020, is Tuesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [23]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8597,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8598,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8599,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
8600,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0


In [24]:
time_features.to_csv("stgan-algo/STGAN/crypto/data/time_features.txt", sep=" ", header=False, index=False)