In [1]:
import numpy as np
import pandas as pd
from torch_geometric.data import Data

In [3]:
values = pd.read_csv('../data/SP100/values.csv').set_index(['Symbol', 'Date'])
values.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,NormClose,DailyLogReturn,ALR1W,ALR2W,ALR1M,ALR2M,RSI,MACD,Momentum,Volatility
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AAPL,2020-04-28 00:00:00-04:00,67.679855,-1.991534,-0.819888,0.919618,-0.52289,1.396465,0.156702,0.536292,0.983231,-0.283287,67.672881
AAPL,2020-04-29 00:00:00-04:00,69.902809,-1.938323,1.621364,1.016196,0.20139,1.446206,0.437971,0.579621,1.133669,2.102058,69.746466
AAPL,2020-04-30 00:00:00-04:00,71.37748,-1.903023,1.047382,1.625991,0.427684,1.719003,-0.144546,0.605928,1.356252,3.508858,71.993274
AAPL,2020-05-01 00:00:00-04:00,70.228363,-1.93053,-0.81427,0.525298,0.382843,2.169176,-0.007386,0.575696,1.423517,2.500305,72.594939
AAPL,2020-05-04 00:00:00-04:00,71.222,-1.906745,0.704866,0.853921,0.994312,2.139425,-0.274888,0.594534,1.539259,2.388748,70.131336


In [4]:
adj = np.load('../data/SP100/adj.npy')
adj[:15, :15]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.85829308, 0.72785829, 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.85829308, 0.       

### Creating the feature matrix and edge indices
The feature matrix is a 3D matrix of shape (stocks_nb, features_nb, timestamps_nb). The adjacency matrix is used to create the edge_index matrix in the PyTorch Geomtric format.

In [6]:
nodes_nb = len(adj)
x = np.array(
	values.drop(columns=["Close"]).to_numpy().reshape((nodes_nb, -1, values.shape[1] - 1))
)  # shape (nodes_nb, timestamps_nb, features_nb)
x = np.swapaxes(x, 1, 2)  # shape (nodes_nb, features_nb, timestamps_nb)

edge_nb = np.count_nonzero(adj)
edge_index = np.zeros((2, edge_nb))
edge_weight = np.zeros((edge_nb,))
count = 0
for i in range(nodes_nb):
	for j in range(nodes_nb):
		if (weight := adj[i, j]) != 0:
			edge_index[0, count], edge_index[1, count] = i, j
			edge_weight[count] = weight
			count += 1
x.shape, edge_index.shape, edge_weight.shape

((100, 10, 1168), (2, 570), (570,))

In [6]:
past_window, future_window = 25, 1
timestamps = [
	Data(x=x[:, :, idx:idx+past_window], edge_index=edge_index, edge_weight=edge_weight, y=x[:, 0, idx+past_window:idx+past_window+future_window]) for idx in range(x.shape[0] - past_window - future_window)
]
timestamps[:5]

[Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570]),
 Data(x=[100, 8, 25], edge_index=[2, 570], y=[100, 1], edge_weight=[570])]