In [1]:
import pandas as pd
import numpy as np
from torch_geometric.data import Data

  from .autonotebook import tqdm as notebook_tqdm


Create one graph for each timestep, using the same adjacency matrix since the structure is based on fundamentals and will not change.

In [2]:
values = pd.read_csv('../data/raw/values.csv').set_index(['Symbol', 'Date'])
values.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,NormClose,DailyLogReturn,ALR1W,ALR2W,ALR1M,ALR2M,RSI,MACD
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AAPL,2016-01-04 00:00:00-05:00,23.776182,-1.21359,0.046584,-0.643187,-0.618213,-1.220123,-1.183828,0.325944,-0.650458
AAPL,2016-01-05 00:00:00-05:00,23.180359,-1.221945,-1.382949,-1.004596,-0.580555,-1.418477,-1.504688,0.281394,-0.699483
AAPL,2016-01-06 00:00:00-05:00,22.726723,-1.228307,-1.07697,-1.966747,-1.163614,-2.067022,-1.636428,0.253036,-0.76611
AAPL,2016-01-07 00:00:00-05:00,21.767557,-1.241758,-2.349732,-2.734253,-1.933525,-2.521879,-1.972253,0.20581,-0.886094
AAPL,2016-01-08 00:00:00-05:00,21.88266,-1.240144,0.287384,-2.102979,-2.070639,-2.451471,-1.936344,0.224515,-0.960819


In [3]:
adj = np.load('../data/raw/adj.npy')
adj[:10, :10]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.72591804, 0.        , 0.70888771],
       [0.        , 0.        , 0.       

In [4]:
nodes_nb = len(adj)
x = np.array(
	values.drop(columns=["Close"]).to_numpy().reshape((nodes_nb, -1, values.shape[1] - 1))
)  # shape (nodes_nb, timestamps_nb, features_nb)
x = np.swapaxes(x, 1, 2)  # shape (nodes_nb, features_nb, timestamps_nb)

edge_nb = np.count_nonzero(adj)
edge_index = np.zeros((2, edge_nb))
edge_weight = np.zeros((edge_nb,))
count = 0
for i in range(nodes_nb):
	for j in range(nodes_nb):
		if (weight := adj[i, j]) != 0:
			edge_index[0, count], edge_index[1, count] = i, j
			edge_weight[count] = weight
			count += 1
x.shape, edge_index.shape, edge_weight.shape

((98, 8, 2473), (2, 638), (638,))

In [5]:
past_window, future_window = 25, 1
timestamps = [
	Data(x=x[:, :, idx:idx+past_window], edge_index=edge_index, edge_weight=edge_weight, y=x[:, 0, idx+past_window:idx+past_window+future_window]) for idx in range(x.shape[0] - past_window - future_window)
]
timestamps[:5]

[Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638]),
 Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638]),
 Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638]),
 Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638]),
 Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638])]

Use the same method to create a standard PyG Dataset class which contains graphs with 3d node features, data for the timestamps $[t, t+T)$, and a target variable, the variation at time $t+T$.

In [1]:
from dataset.stock import StocksDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = StocksDataset()
dataset, dataset[0]

(StocksDataset(2447),
 Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638], close_price=[98, 25], close_price_y=[98, 1]))