# Crypto Data
## Setup

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from data_processing import download_data

## Price Data

In [41]:
tickers = [coin + "-USD" for coin in ["BTC", "ETH", "XRP", "BNB", "SOL", "TRX", "DOGE", "BCH", "ADA"]]

period = "365d"
interval = "1h"

cols = ["Datetime", "Adj Close", "Volume"]
num_features = len(cols) - 1


dfs = []
df_vals = []
for ticker in tickers:
    df = download_data(ticker, cols, period=period, interval=interval, save=True)
    df.rename({col: col + " " + ticker[:3] for col in df.columns if col != "Datetime"}, axis=1, inplace=True)
    df.set_index("Datetime", inplace=True)
    dfs.append(df)
    df_vals.append(df.values)

stack = np.stack(df_vals, axis=1)
df = pd.concat(dfs, axis=1)

In [42]:
stack.shape

(8593, 9, 2)

In [43]:
data_tensor = torch.tensor(stack).unsqueeze(-1)
data_tensor.shape

torch.Size([8593, 9, 2, 1])

In [44]:
# Save
np.save("stgan/STGAN/crypto_volume/data/data.npy", np.array(data_tensor))

In [45]:
# Load
data_tensor = torch.tensor(np.load("stgan/STGAN/crypto_volume/data/data.npy"), dtype=torch.float)
data_tensor.shape

torch.Size([8593, 9, 2, 1])

In [46]:
data_tensor.shape

torch.Size([8593, 9, 2, 1])

## Distances: Use Correlations

In [47]:
df[[col for col in df.columns if "Adj Close" in col]].corr()

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,1.0,0.74407,0.844398,0.456034,0.882507,0.70283,0.793956,0.360844,0.683089
Adj Close ETH,0.74407,1.0,0.701173,0.805254,0.800554,0.924148,0.5618,0.736471,0.382871
Adj Close XRP,0.844398,0.701173,1.0,0.314247,0.853642,0.610706,0.891633,0.219497,0.845262
Adj Close BNB,0.456034,0.805254,0.314247,1.0,0.553238,0.699597,0.211908,0.699576,-0.00053
Adj Close SOL,0.882507,0.800554,0.853642,0.553238,1.0,0.685942,0.891786,0.312376,0.770449
Adj Close TRX,0.70283,0.924148,0.610706,0.699597,0.685942,1.0,0.437573,0.80752,0.276544
Adj Close DOG,0.793956,0.5618,0.891633,0.211908,0.891786,0.437573,1.0,-0.00966,0.924476
Adj Close BCH,0.360844,0.736471,0.219497,0.699576,0.312376,0.80752,-0.00966,1.0,-0.213623
Adj Close ADA,0.683089,0.382871,0.845262,-0.00053,0.770449,0.276544,0.924476,-0.213623,1.0


In [48]:
# Use simple intuition: distance = 1 - correlation
dists =  1 - df[[col for col in df.columns if "Adj Close" in col]].corr()

In [49]:
dists

Unnamed: 0,Adj Close BTC,Adj Close ETH,Adj Close XRP,Adj Close BNB,Adj Close SOL,Adj Close TRX,Adj Close DOG,Adj Close BCH,Adj Close ADA
Adj Close BTC,0.0,0.25593,0.155602,0.543966,0.117493,0.29717,0.206044,0.639156,0.316911
Adj Close ETH,0.25593,0.0,0.298827,0.194746,0.199446,0.075852,0.4382,0.263529,0.617129
Adj Close XRP,0.155602,0.298827,0.0,0.685753,0.146358,0.389294,0.108367,0.780503,0.154738
Adj Close BNB,0.543966,0.194746,0.685753,0.0,0.446762,0.300403,0.788092,0.300424,1.00053
Adj Close SOL,0.117493,0.199446,0.146358,0.446762,0.0,0.314058,0.108214,0.687624,0.229551
Adj Close TRX,0.29717,0.075852,0.389294,0.300403,0.314058,0.0,0.562427,0.19248,0.723456
Adj Close DOG,0.206044,0.4382,0.108367,0.788092,0.108214,0.562427,0.0,1.00966,0.075524
Adj Close BCH,0.639156,0.263529,0.780503,0.300424,0.687624,0.19248,1.00966,0.0,1.213623
Adj Close ADA,0.316911,0.617129,0.154738,1.00053,0.229551,0.723456,0.075524,1.213623,0.0


In [50]:
dists.to_csv("stgan/STGAN/crypto_volume/data/node_dist.txt", sep=" ", header=False, index=False)

In [51]:
dists.reset_index(drop=True, inplace=True)
dists.columns = range(dists.shape[1])
dists

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.25593,0.155602,0.543966,0.117493,0.29717,0.206044,0.639156,0.316911
1,0.25593,0.0,0.298827,0.194746,0.199446,0.075852,0.4382,0.263529,0.617129
2,0.155602,0.298827,0.0,0.685753,0.146358,0.389294,0.108367,0.780503,0.154738
3,0.543966,0.194746,0.685753,0.0,0.446762,0.300403,0.788092,0.300424,1.00053
4,0.117493,0.199446,0.146358,0.446762,0.0,0.314058,0.108214,0.687624,0.229551
5,0.29717,0.075852,0.389294,0.300403,0.314058,0.0,0.562427,0.19248,0.723456
6,0.206044,0.4382,0.108367,0.788092,0.108214,0.562427,0.0,1.00966,0.075524
7,0.639156,0.263529,0.780503,0.300424,0.687624,0.19248,1.00966,0.0,1.213623
8,0.316911,0.617129,0.154738,1.00053,0.229551,0.723456,0.075524,1.213623,0.0


## Adjacency Matrix

In [52]:
k = 9
adj = pd.DataFrame([], columns=range(k))
for i in range(dists.shape[1]):
    adj.loc[i] = list(dists.loc[i].sort_values()[:k].index)
adj

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,4,2,6,1,5,8,3,7
1,1,5,3,4,0,7,2,6,8
2,2,6,4,8,0,1,5,3,7
3,3,1,5,7,4,0,2,6,8
4,4,6,0,2,1,8,5,3,7
5,5,1,7,0,3,4,2,6,8
6,6,8,4,2,0,1,5,3,7
7,7,5,1,3,0,4,2,6,8
8,8,6,2,4,0,1,5,3,7


In [53]:
adj.to_csv("stgan/STGAN/crypto_volume/data/node_adjacent.txt", sep=" ", header=False, index=False)

## Node Subgraph

In [54]:
stdev = np.array(dists).std()
stdev

0.30303320950585794

In [55]:
W = np.exp(-dists**2/stdev**2)
for i in range(len(W)):
    W.loc[i,i] = 0   
W

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.490033,0.768231,0.039864,0.860426,0.382251,0.629823,0.0116937,0.3349777
1,0.490033,0.0,0.378162,0.661659,0.648443,0.939267,0.123558,0.4694156,0.01580622
2,0.768231,0.378162,0.0,0.00597,0.791942,0.191983,0.879956,0.001315021,0.7704787
3,0.039864,0.661659,0.00597,0.0,0.113772,0.374293,0.001155,0.3742424,1.843357e-05
4,0.860426,0.648443,0.791942,0.113772,0.0,0.341609,0.880273,0.005805262,0.5633674
5,0.382251,0.939267,0.191983,0.374293,0.341609,0.0,0.031914,0.6680105,0.003347337
6,0.629823,0.123558,0.879956,0.001155,0.880273,0.031914,0.0,1.509432e-05,0.9397752
7,0.011694,0.469416,0.001315,0.374242,0.005805,0.668011,1.5e-05,0.0,1.081917e-07
8,0.334978,0.015806,0.770479,1.8e-05,0.563367,0.003347,0.939775,1.081917e-07,0.0


In [56]:
node_subgraph = torch.empty((len(tickers), k, k))
node_subgraph.shape

torch.Size([9, 9, 9])

In [57]:
for i in range(len(tickers)):
    node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())
node_subgraph.shape

  node_subgraph[i,:,:] = torch.from_numpy(W.loc[adj.loc[i], adj.loc[i]].to_numpy())


torch.Size([9, 9, 9])

In [58]:
node_subgraph[0,:,:]

tensor([[0.0000e+00, 8.6043e-01, 7.6823e-01, 6.2982e-01, 4.9003e-01, 3.8225e-01,
         3.3498e-01, 3.9864e-02, 1.1694e-02],
        [8.6043e-01, 0.0000e+00, 7.9194e-01, 8.8027e-01, 6.4844e-01, 3.4161e-01,
         5.6337e-01, 1.1377e-01, 5.8053e-03],
        [7.6823e-01, 7.9194e-01, 0.0000e+00, 8.7996e-01, 3.7816e-01, 1.9198e-01,
         7.7048e-01, 5.9700e-03, 1.3150e-03],
        [6.2982e-01, 8.8027e-01, 8.7996e-01, 0.0000e+00, 1.2356e-01, 3.1914e-02,
         9.3978e-01, 1.1551e-03, 1.5094e-05],
        [4.9003e-01, 6.4844e-01, 3.7816e-01, 1.2356e-01, 0.0000e+00, 9.3927e-01,
         1.5806e-02, 6.6166e-01, 4.6942e-01],
        [3.8225e-01, 3.4161e-01, 1.9198e-01, 3.1914e-02, 9.3927e-01, 0.0000e+00,
         3.3473e-03, 3.7429e-01, 6.6801e-01],
        [3.3498e-01, 5.6337e-01, 7.7048e-01, 9.3978e-01, 1.5806e-02, 3.3473e-03,
         0.0000e+00, 1.8434e-05, 1.0819e-07],
        [3.9864e-02, 1.1377e-01, 5.9700e-03, 1.1551e-03, 6.6166e-01, 3.7429e-01,
         1.8434e-05, 0.0000e+0

In [59]:
np.save("stgan/STGAN/crypto_volume/data/node_subgraph.npy", node_subgraph.numpy())

## Extra Features

In [60]:
time_features = np.zeros([data_tensor.shape[0], 24 + 7])
time_features = pd.DataFrame(time_features, dtype=int)

In [61]:
# Hour one-hot encoding
timestamp = int(interval[:1])
day_length = 24 * timestamp
for i in range(len(time_features)):
    hour = divmod(divmod(i, day_length)[1], timestamp)[0]
    time_features.loc[i, hour] = 1

In [62]:
# Day one-hot encoding
week_length = 7
starting_day = 2 # The starting day, Feb 19 2025, is Wednesday
for i in range(len(time_features)):
    weekday = divmod(divmod(i, day_length)[0], week_length)[1]
    time_features.loc[i, 24 + divmod(starting_day + weekday, week_length)[1]] = 1

In [63]:
time_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8588,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
8589,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
8590,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
8591,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0


In [64]:
time_features.to_csv("stgan/STGAN/crypto_volume/data/time_features.txt", sep=" ", header=False, index=False)