In [1]:
import numpy as np
import pandas as pd
from torch_geometric.data import Data

In [3]:
values = pd.read_csv('../data/SP100/values.csv').set_index(['Symbol', 'Date'])
values.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,NormClose,DailyLogReturn,ALR1W,ALR2W,ALR1M,ALR2M,RSI,MACD
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AAPL,2020-02-14 00:00:00-05:00,78.945244,-1.717131,0.012339,0.375774,0.891135,0.537198,1.441355,0.616398,1.705852
AAPL,2020-02-18 00:00:00-05:00,77.49971,-1.751778,-0.927029,-0.196117,0.616552,0.169253,1.138491,0.54969,1.539877
AAPL,2020-02-19 00:00:00-05:00,78.622124,-1.724876,0.721292,0.307105,0.300578,0.209332,1.244552,0.587059,1.481828
AAPL,2020-02-20 00:00:00-05:00,77.815559,-1.744207,-0.517268,-0.524958,-0.021182,0.167552,1.176899,0.551633,1.355121
AAPL,2020-02-21 00:00:00-05:00,76.054199,-1.786423,-1.148492,-0.912862,-0.623824,-0.147209,0.97277,0.483076,1.099898


In [4]:
adj = np.load('../data/SP100/adj.npy')
adj[:10, :10]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.85829308, 0.72785829, 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.85829308, 0.       

### Creating the feature matrix and edge indices
The feature matrix is a 3D matrix of shape (stocks_nb, features_nb, timestamps_nb). The adjacency matrix is used to create the edge_index matrix in the PyTorch Geomtric format.

In [5]:
nodes_nb = len(adj)
x = np.array(
	values.drop(columns=["Close"]).to_numpy().reshape((nodes_nb, -1, values.shape[1] - 1))
)  # shape (nodes_nb, timestamps_nb, features_nb)
x = np.swapaxes(x, 1, 2)  # shape (nodes_nb, features_nb, timestamps_nb)

edge_nb = np.count_nonzero(adj)
edge_index = np.zeros((2, edge_nb))
edge_weight = np.zeros((edge_nb,))
count = 0
for i in range(nodes_nb):
	for j in range(nodes_nb):
		if (weight := adj[i, j]) != 0:
			edge_index[0, count], edge_index[1, count] = i, j
			edge_weight[count] = weight
			count += 1
x.shape, edge_index.shape, edge_weight.shape

((100, 8, 1216), (2, 570), (570,))

In [6]:
past_window, future_window = 25, 1
timestamps = [
	Data(x=x[:, :, idx:idx+past_window], edge_index=edge_index, edge_weight=edge_weight, y=x[:, 0, idx+past_window:idx+past_window+future_window]) for idx in range(x.shape[0] - past_window - future_window)
]
timestamps[:5]

[Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570])]