In [170]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import AGNNConv
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import GraphSAINTRandomWalkSampler, GraphSAINTNodeSampler
from matplotlib import pyplot as plt
import numpy as np
from train_utils import *
from product_graph import *
from torch_geometric.data import HeteroData
from tqdm import tqdm

#### Load the dataset

In [171]:
# static_data = np.load("data/preprocessed/static_data.npy", allow_pickle = True)
dynamic_data = torch.tensor(np.load("data/preprocessed/dynamic_data.npy", allow_pickle=True))
S = torch.tensor(np.load("data/adjacency/coords_features.npy", allow_pickle=False))
S.shape

torch.Size([2628, 2628])

#### Process the data

In [172]:
data = Data(x = dynamic_data)
edge_index = torch.nonzero(torch.tensor(S), as_tuple=False).t().contiguous()
edge_weight = S[edge_index[0], edge_index[1]]
data.edge_index = edge_index
data.edge_weight = edge_weight

  edge_index = torch.nonzero(torch.tensor(S), as_tuple=False).t().contiguous()


In [173]:
data

Data(x=[2628, 279], edge_index=[2, 1671876], edge_weight=[1671876])

#### Sampler method 1: Call create_forecasting_dataset for each subgraphs. 

In [175]:
# Define GraphSAINTRandomWalkSampler
loader = GraphSAINTNodeSampler (
    data,
    batch_size=200,
    num_steps=6,
)

# Example: Iterate over the DataLoader
for i, batch in enumerate(loader):
    dataset = create_forecasting_dataset(batch.x.T,
                                      splits = None,
                                      pred_horizen= 1,
                                      obs_window= 4,
                                      verbose = 0)
    batch_sample = Data(x = torch.tensor(dataset['trn']['data']), y = torch.tensor(dataset['trn']['labels']), 
                        edge_index= batch.edge_index, edge_weight = batch.edge_weight)
    print(f"Batch {i+1}: {batch_sample}")
    
    

Batch 1: Data(x=[275, 189, 4], edge_index=[2, 13246], y=[275, 189, 1], edge_weight=[13246])
Batch 2: Data(x=[275, 192, 4], edge_index=[2, 13166], y=[275, 192, 1], edge_weight=[13166])
Batch 3: Data(x=[275, 192, 4], edge_index=[2, 14350], y=[275, 192, 1], edge_weight=[14350])
Batch 4: Data(x=[275, 186, 4], edge_index=[2, 14248], y=[275, 186, 1], edge_weight=[14248])
Batch 5: Data(x=[275, 193, 4], edge_index=[2, 14626], y=[275, 193, 1], edge_weight=[14626])
Batch 6: Data(x=[275, 191, 4], edge_index=[2, 14054], y=[275, 191, 1], edge_weight=[14054])


#### Sampler method 2: Call try nested loader method.

1. First loop through the samples (the time samples)
2. For each samplee graph, build subgraph and train with them
3. With this method, we can have train test split along the sample dimension

In [167]:
data = create_forecasting_dataset(dynamic_data.T,
                                      splits = [0.8, 0.1, 0.1],
                                      pred_horizen= 1,
                                      obs_window= 4,
                                      verbose = 0)
edge_index = torch.nonzero(torch.tensor(S), as_tuple=False).t().contiguous()
edge_weight = S[edge_index[0], edge_index[1]]

  edge_index = torch.nonzero(torch.tensor(S), as_tuple=False).t().contiguous()


#### For training set

In [168]:
# loop over the number of samples
num_samples = data['trn']['data'].shape[0]
for i in range(num_samples):
    print(f"outer batch {i}")
    # Create a torch geometric data over each graph 
    outer_batch = Data(x = torch.tensor(data['trn']['data'][i]), y = torch.tensor(data['trn']['labels'][i].squeeze()),
                       edge_index=edge_index, edge_weight = edge_weight) 
    
    loader = GraphSAINTNodeSampler(outer_batch, batch_size=100, num_steps=6)
    for inner_batch in loader:
        print(inner_batch)

outer batch 0
Data(num_nodes=98, edge_index=[2, 3992], x=[98, 4], y=[98], edge_weight=[3992])
Data(num_nodes=97, edge_index=[2, 3782], x=[97, 4], y=[97], edge_weight=[3782])
Data(num_nodes=98, edge_index=[2, 3686], x=[98, 4], y=[98], edge_weight=[3686])
Data(num_nodes=97, edge_index=[2, 3564], x=[97, 4], y=[97], edge_weight=[3564])
Data(num_nodes=99, edge_index=[2, 3538], x=[99, 4], y=[99], edge_weight=[3538])
Data(num_nodes=99, edge_index=[2, 3640], x=[99, 4], y=[99], edge_weight=[3640])
outer batch 1
Data(num_nodes=98, edge_index=[2, 3520], x=[98, 4], y=[98], edge_weight=[3520])
Data(num_nodes=96, edge_index=[2, 3816], x=[96, 4], y=[96], edge_weight=[3816])
Data(num_nodes=99, edge_index=[2, 3420], x=[99, 4], y=[99], edge_weight=[3420])
Data(num_nodes=98, edge_index=[2, 3800], x=[98, 4], y=[98], edge_weight=[3800])
Data(num_nodes=98, edge_index=[2, 3508], x=[98, 4], y=[98], edge_weight=[3508])
Data(num_nodes=96, edge_index=[2, 3594], x=[96, 4], y=[96], edge_weight=[3594])
outer batch 

#### For validation set

In [160]:
# loop over the number of samples
num_samples = data['val']['data'].shape[0]
for i in range(num_samples):
    print(f"outer batch {i}")
    # Create a torch geometric data over each graph 
    outer_batch = Data(x = torch.tensor(data['val']['data'][i]), y = torch.tensor(data['val']['labels'][i].squeeze()),
                       edge_index=edge_index, edge_weight = edge_weight) 
    
    loader = GraphSAINTNodeSampler(outer_batch, batch_size=200, num_steps=6)
    for inner_batch in loader:
        print(inner_batch)

outer batch 0
Data(num_nodes=192, edge_index=[2, 14460], x=[192, 4], y=[192], edge_weight=[14460])
Data(num_nodes=188, edge_index=[2, 13672], x=[188, 4], y=[188], edge_weight=[13672])
Data(num_nodes=191, edge_index=[2, 14078], x=[191, 4], y=[191], edge_weight=[14078])
Data(num_nodes=186, edge_index=[2, 13702], x=[186, 4], y=[186], edge_weight=[13702])
Data(num_nodes=186, edge_index=[2, 13888], x=[186, 4], y=[186], edge_weight=[13888])
Data(num_nodes=189, edge_index=[2, 13502], x=[189, 4], y=[189], edge_weight=[13502])
outer batch 1
Data(num_nodes=191, edge_index=[2, 15550], x=[191, 4], y=[191], edge_weight=[15550])
Data(num_nodes=188, edge_index=[2, 12806], x=[188, 4], y=[188], edge_weight=[12806])
Data(num_nodes=190, edge_index=[2, 14140], x=[190, 4], y=[190], edge_weight=[14140])
Data(num_nodes=189, edge_index=[2, 12802], x=[189, 4], y=[189], edge_weight=[12802])
Data(num_nodes=190, edge_index=[2, 14058], x=[190, 4], y=[190], edge_weight=[14058])
Data(num_nodes=191, edge_index=[2, 12

#### For testing set

In [161]:
# loop over the number of samples
num_samples = data['tst']['data'].shape[0]
for i in range(num_samples):
    print(f"outer batch {i}")
    # Create a torch geometric data over each graph 
    outer_batch = Data(x = torch.tensor(data['tst']['data'][i]), y = torch.tensor(data['tst']['labels'][i].squeeze()),
                       edge_index=edge_index, edge_weight = edge_weight) 
    
    loader = GraphSAINTNodeSampler(outer_batch, batch_size=200, num_steps=6)
    for inner_batch in loader:
        print(inner_batch)

outer batch 0
Data(num_nodes=190, edge_index=[2, 13594], x=[190, 4], y=[190], edge_weight=[13594])
Data(num_nodes=188, edge_index=[2, 13100], x=[188, 4], y=[188], edge_weight=[13100])
Data(num_nodes=189, edge_index=[2, 13744], x=[189, 4], y=[189], edge_weight=[13744])
Data(num_nodes=190, edge_index=[2, 15088], x=[190, 4], y=[190], edge_weight=[15088])
Data(num_nodes=191, edge_index=[2, 15136], x=[191, 4], y=[191], edge_weight=[15136])
Data(num_nodes=190, edge_index=[2, 13934], x=[190, 4], y=[190], edge_weight=[13934])
outer batch 1
Data(num_nodes=189, edge_index=[2, 14842], x=[189, 4], y=[189], edge_weight=[14842])
Data(num_nodes=194, edge_index=[2, 15216], x=[194, 4], y=[194], edge_weight=[15216])
Data(num_nodes=194, edge_index=[2, 14150], x=[194, 4], y=[194], edge_weight=[14150])
Data(num_nodes=185, edge_index=[2, 14472], x=[185, 4], y=[185], edge_weight=[14472])
Data(num_nodes=190, edge_index=[2, 15480], x=[190, 4], y=[190], edge_weight=[15480])
Data(num_nodes=189, edge_index=[2, 13