In [1]:
import numpy as np
import networkx as nx

from torch_geometric_temporal.signal import temporal_signal_split
from torch_geometric_temporal.signal import StaticGraphTemporalSignal, DynamicGraphTemporalSignal
from torch_geometric_temporal.dataset import PedalMeDatasetLoader, ChickenpoxDatasetLoader

import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import DCRNN, A3TGCN, MPNNLSTM
from torch_geometric_temporal.nn.convolutional import ASTGCN

from tqdm import tqdm
import os

### Sources:
- https://pytorch-geometric-temporal.readthedocs.io/en/latest/notes/introduction.html#epidemiological-forecasting
- https://pytorch-geometric-temporal.readthedocs.io/en/latest/modules/signal.html#module-torch_geometric_temporal.signal.static_graph_temporal_signal

### Data loader

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
device

device(type='cuda')

In [3]:
source = '/media/storage_3/abiricz/Mobilcell/TimeIntervalGraphs/'
files = np.array( sorted([ i for i in os.listdir(source) ]) )
files[:10]

array(['graph_20181201_0_minutes-0-30.npz',
       'graph_20181201_10_minutes-300-330.npz',
       'graph_20181201_11_minutes-330-360.npz',
       'graph_20181201_12_minutes-360-390.npz',
       'graph_20181201_13_minutes-390-420.npz',
       'graph_20181201_14_minutes-420-450.npz',
       'graph_20181201_15_minutes-450-480.npz',
       'graph_20181201_16_minutes-480-510.npz',
       'graph_20181201_17_minutes-510-540.npz',
       'graph_20181201_18_minutes-540-570.npz'], dtype='<U39')

In [4]:
dates = np.unique( [ j.split('_')[1] for j in files ] )

### Ideas to implement
- get 12 consecutive time steps into x
- put 13. time step's value into y

In [5]:
loaded = np.load(source+files[5])
print( list( loaded.keys() ) )
loaded['data_edge_index'].shape, loaded['data_edge_attr'].shape, loaded['data_x'].shape

['data_edge_index', 'data_edge_attr', 'data_x']


((2, 96170), (96170, 1), (14585, 3))

In [6]:
loaded['data_x'].astype(int)[:,-1]

array([8362, 8809, 2571, ...,   77,   40,   33])

In [7]:
loaded['data_edge_attr'].astype(int)

array([[ 60],
       [ 85],
       [202],
       ...,
       [  1],
       [  1],
       [  1]])

In [8]:
indexer = np.arange(12)[None, :] + np.arange(48)[:, None]
indexer_x = indexer[:-12]
indexer_y = 1+indexer[:-12][:,-1]
indexer_x, indexer_y

(array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
        [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13],
        [ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
        [ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
        [ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16],
        [ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17],
        [ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
        [ 8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
        [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
        [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
        [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
        [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
        [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
        [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27],
        [17, 1

In [9]:
def data_loader( source, filename ):
    loaded = np.load( source+filename )
    return loaded['data_edge_index'], loaded['data_edge_attr'], loaded['data_x']

In [10]:
def load_a_day( day_num ):
    daily_files = [ 'graph_'+dates[0]+'_'+str(l)+'_minutes-'+\
               str(l*30)+'-'+str((1+l)*30)+'.npz' for l in np.arange(48) ]
    
    # lists to hold loaded data
    edge_indices, edge_weights, features = ( [] for i in range(3) )
    
    for s in range( len(daily_files) ):
        edge_idx, edge_weight, feature = data_loader( source, daily_files[s] )
        edge_indices.append( edge_idx )
        edge_weights.append( edge_weight )
        features.append( feature )
    
    return edge_indices, edge_weights, features

In [11]:
def process_a_day( day_num, snap_counts=12, max_n=48 ):
    # rolling indices
    indexer = np.arange(snap_counts)[None, :] + np.arange(max_n)[:, None]
    indexer_x = indexer[:-snap_counts]
    indexer_y = 1+indexer[:-snap_counts][:,-1]
    
    # loaded data for this day
    edge_indices_all, edge_weights_all, features_all = load_a_day(day_num)
    
    # lists to hold loaded data structured for learning
    edge_indices_final = np.array( [ [ torch.LongTensor(edge_indices_all[l]).to(device) for l 
                                        in m ] for m in indexer_x ], 
                                   dtype=object )
    edge_weights_final = np.array( [ [ torch.LongTensor(edge_weights_all[l]).to(device) for l 
                                        in m ] for m in indexer_x ], 
                                   dtype=object )
    features_final = [ [ torch.LongTensor( features_all[l][:,-1] ).to(device) for l in m ] for m in indexer_x ]
    targets_final = [ torch.LongTensor( features_all[l][:,-1]).to(device) for l in indexer_y ]
    
    return edge_indices_final, edge_weights_final, features_final, targets_final

##### an array of a list of Tensor arrays, depending on whether edges change over time.

In [12]:
snap_counts=12
max_n=48
indexer = np.arange(snap_counts)[None, :] + np.arange(max_n)[:, None]
indexer_x = indexer[:-snap_counts]
indexer_y = 1+indexer[:-snap_counts][:,-1]

In [13]:
# loaded data for this day
edge_indices_all, edge_weights_all, features_all = load_a_day(0)

In [14]:
edge_indices, edge_weights, features, targets = process_a_day(0)
print( len(edge_indices), len(edge_weights), len(features), len(targets) )
print( len(edge_indices[0]), len(edge_weights[0]), len(features[0]), len(targets[0]) )

36 36 36 36
12 12 12 14585


In [15]:
edge_indices[0][0].dtype

torch.int64

In [16]:
node_count = 14585

num_for_predict = 1
len_input = 12
T = len_input
nb_time_strides = 1

node_features = 1
nb_block = 2
K = 3
nb_chev_filter = 64
nb_time_filter = 64
batch_size = 1
normalization = None
bias = True

In [17]:
x_seq = torch.zeros([batch_size, node_count, node_features, T]).to(device)
target_seq = torch.zeros([batch_size, node_count, num_for_predict]).to(device)
edge_index_seq = []
edge_weight_seq = []
x_seq.shape, target_seq.shape

(torch.Size([1, 14585, 1, 12]), torch.Size([1, 14585, 1]))

In [18]:
edge_indices.shape, edge_weights.shape, len(features), len(features[0]), len(targets)

((36, 12), (36, 12), 36, 12, 36)

In [19]:
for b in range(batch_size):
    edge_index_temp = []
    edge_weight_temp = []
    for t in range(T):
        # load
        current_x = torch.cuda.LongTensor( features[b][t].reshape(-1,1) )
        current_edge_index = edge_indices[b][t]
        current_edge_weight = edge_weights[b][t]
        
        # save
        x_seq[b,:,:,t] = current_x.to(device)
        edge_index_temp.append( current_edge_index.to(device) )
        edge_weight_temp.append( current_edge_weight.to(device) )
        
    edge_index_seq.append( edge_index_temp )#current_edge_index.to(device) )
    edge_weight_seq.append( edge_weight_temp ) #current_edge_weight.to(device) )
    
    target = targets[b]
# convert to numpy array (-> an array of a list of Tensor arrays)
#edge_index_seq = np.array( edge_index_seq, dtype=object ).reshape(batch_size, -1)
#edge_weight_seq = np.array( edge_weight_seq, dtype=object ).reshape(batch_size, -1)

In [20]:
len(edge_index_seq), len(edge_weight_seq), x_seq.shape, target_seq.shape

(1, 1, torch.Size([1, 14585, 1, 12]), torch.Size([1, 14585, 1]))

In [21]:
edge_index_seq[0][0].shape, edge_index_seq[0][1].shape

(torch.Size([2, 28863]), torch.Size([2, 23873]))

In [22]:
features[10]

[tensor([5706, 4212, 1288,  ...,   38,   33,   14], device='cuda:0'),
 tensor([5629, 4487, 1364,  ...,   44,   35,   16], device='cuda:0'),
 tensor([6071, 4929, 1437,  ...,   55,   41,   23], device='cuda:0'),
 tensor([6434, 5595, 1557,  ...,   60,   43,   23], device='cuda:0'),
 tensor([8362, 8809, 2571,  ...,   77,   40,   33], device='cuda:0'),
 tensor([9498, 9205, 2907,  ...,   99,   54,   27], device='cuda:0'),
 tensor([11816, 11041,  3733,  ...,   126,    62,    50], device='cuda:0'),
 tensor([13901, 12666,  4208,  ...,   150,    62,    41], device='cuda:0'),
 tensor([15605, 14262,  4712,  ...,   150,    66,    60], device='cuda:0'),
 tensor([17789, 15257,  5190,  ...,   203,    70,    66], device='cuda:0'),
 tensor([18818, 15845,  5537,  ...,   161,    71,    44], device='cuda:0'),
 tensor([19582, 15811,  5556,  ...,   173,    62,    60], device='cuda:0')]

In [23]:
train_dataset = torch.utils.data.TensorDataset(x_seq, target_seq)

In [24]:
dataset = DynamicGraphTemporalSignal( 
            edge_index_seq, 
            edge_weight_seq, 
            x_seq, 
            target_seq )

In [25]:
dataset.edge_indices[0][0].shape, dataset.edge_indices[0][1].shape, dataset.edge_indices[0][2].shape

(torch.Size([2, 28863]), torch.Size([2, 23873]), torch.Size([2, 21832]))

Args:

-    edge_indices (List of Numpy arrays): List of edge index tensors.
-    edge_weights (List of Numpy arrays): List of edge weight tensors.
-    features (List of Numpy arrays): List of node feature tensors.
-    targets (List of Numpy arrays): List of node label (target) tensors.

In [26]:
dataset.snapshot_count

1

In [27]:
dataset.edge_indices[0]

[tensor([[    0,     0,     0,  ..., 14559, 14566, 14571],
         [    1,     2,     3,  ..., 14574, 14568, 14572]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 14567, 14569, 14583],
         [    1,     2,     3,  ..., 14568, 14575, 14584]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 14547, 14558, 14571],
         [    1,     2,     3,  ..., 14550, 14559, 14572]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 14559, 14566, 14582],
         [    1,     2,     3,  ..., 14567, 14568, 14583]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 14559, 14566, 14571],
         [    1,     2,     3,  ..., 14567, 14568, 14572]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 14549, 14559, 14566],
         [    1,     2,     3,  ..., 14550, 14567, 14568]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 14303, 14549, 14566],
         [    1,     2,     3,  ..., 14306, 14550, 14568]], device='cuda:0'),
 tensor([[    0,     0,     0,  ..., 1454

In [28]:
len( dataset.edge_weights[0][4] )

19011

In [29]:
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

#### ASTGCN:
https://pytorch-geometric-temporal.readthedocs.io/en/latest/modules/root.html#torch_geometric_temporal.nn.convolutional.astgcn.ASTGCN


nb_block (int) – Number of ASTGCN blocks in the model.

in_channels (int) – Number of input features.

K (int) – Order of Chebyshev polynomials. Degree is K-1.

nb_chev_filters (int) – Number of Chebyshev filters.

nb_time_filters (int) – Number of time filters.

time_strides (int) – Time strides during temporal convolution.

edge_index (array) – edge indices.

num_for_predict (int) – Number of predictions to make in the future.

len_input (int) – Length of the input sequence.

num_of_vertices (int) – Number of vertices in the graph.

#### MPNNLSTM:

in_channels (int) – Number of input features.

out_channels (int) – Number of output features.

hidden_size (int) – Dimension of hidden representations.

num_nodes (int) – Number of nodes in the network.

window (int) – Number of past samples included in the input.

dropout (float) – Dropout rate.

In [30]:
node_count = 14585
#num_classes = 10

num_for_predict = 1
len_input = 12
nb_time_strides = 1

node_features = 1
nb_block = 2
K = 3
nb_chev_filter = 64
nb_time_filter = 64
batch_size = 1
normalization = None
bias = True

model = ASTGCN(nb_block, node_features, K, nb_chev_filter, nb_time_filter, nb_time_strides, num_for_predict, 
        len_input, node_count, normalization, bias).to(device)

In [31]:
len(features)

36

In [34]:
F = torch.zeros( len_input, node_count, node_features ).to(device)
for f in range( len( features[0] ) ):
    F[f] = features[0][f][0].reshape(-1,1)

In [33]:
features[0][0]#.type(torch.cuda.LongTensor)

tensor([10287,  4792,  1880,  ...,    38,    40,    21], device='cuda:0')

In [39]:
features[0]

[tensor([10287,  4792,  1880,  ...,    38,    40,    21], device='cuda:0'),
 tensor([9289, 4293, 1666,  ...,   32,   31,   20], device='cuda:0'),
 tensor([8798, 4133, 1571,  ...,   37,   37,   20], device='cuda:0'),
 tensor([8440, 4147, 1594,  ...,   37,   33,   15], device='cuda:0'),
 tensor([8326, 4157, 1585,  ...,   32,   37,   22], device='cuda:0'),
 tensor([7701, 3996, 1499,  ...,   37,   31,   21], device='cuda:0'),
 tensor([7270, 4092, 1473,  ...,   37,   31,   22], device='cuda:0'),
 tensor([6900, 4112, 1433,  ...,   33,   47,   14], device='cuda:0'),
 tensor([6372, 4040, 1406,  ...,   34,   30,   15], device='cuda:0'),
 tensor([5957, 4098, 1328,  ...,   27,   38,   16], device='cuda:0'),
 tensor([5706, 4212, 1288,  ...,   38,   33,   14], device='cuda:0'),
 tensor([5629, 4487, 1364,  ...,   44,   35,   16], device='cuda:0')]

In [35]:
F.expand(1, 12, 14585, 1).permute(0, 2, 3, 1).shape

torch.Size([1, 14585, 1, 12])

In [37]:
model( F.expand(1, 12, 14585, 1).permute(0, 2, 3, 1), edge_indices[0] )

TypeError: Cannot interpret 'torch.int64' as a data type

In [None]:
type(torch.DoubleTensor)

In [52]:
edge_indices[0][0].torch.long

AttributeError: 'Tensor' object has no attribute 'torch'

In [39]:
import torch
import numpy as np
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.utils import barabasi_albert_graph
from torch_geometric.transforms import LaplacianLambdaMax
from torch_geometric_temporal.nn.convolutional import TemporalConv, STConv, ASTGCN, MSTGCN, MTGNN, ChebConvAttention
from torch_geometric_temporal.nn.convolutional import GMAN, SpatioTemporalAttention, SpatioTemporalEmbedding

In [40]:
"""
Testing MSTGCN block with changing edge index over time.
"""
node_count = 307
num_classes = 10
edge_per_node = 15

num_for_predict = 12
len_input = 12
nb_time_strides = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
node_features = 2
nb_block = 2
K = 3
nb_chev_filter = 64
nb_time_filter = 64
batch_size = 1

model = MSTGCN( nb_block, node_features, K, nb_chev_filter,
                nb_time_filter, nb_time_strides, 
                num_for_predict, len_input ).to(device)

In [41]:
model( F.expand(1, 12, 14585, 1).permute(0, 2, 3, 1), edge_indices[0] )

TypeError: Cannot interpret 'torch.int64' as a data type

In [31]:
features[0][0].shape

(14585,)

In [32]:
class recurrent_ASTGCN(torch.nn.Module):
    def __init__(self):
        super(recurrent_ASTGCN, self).__init__()
        self.recurrent = ASTGCN(3, 1, 2, 3, 3, 1, 1, 12, 14585 )
        self.linear = torch.nn.Linear(32, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

In [33]:
model = recurrent_ASTGCN()

optimizer = torch.optim.Adam( model.parameters(), lr=1e-3 )

model.train()

for epoch in tqdm(range(1)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        y_hat = model( features, edge_index_seq, edge_weight_seq )
        cost = cost + torch.mean((y_hat-snapshot.y)**2)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

  0%|          | 0/1 [00:00<?, ?it/s]


ValueError: only one element tensors can be converted to Python scalars

In [67]:
model = recurrent_ASTGCN()

optimizer = torch.optim.Adam( model.parameters(), lr=1e-3 )

model.train()

for epoch in tqdm(range(1)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        y_hat = model( snapshot.x, snapshot.edge_index, snapshot.edge_attr )
        cost = cost + torch.mean((y_hat-snapshot.y)**2)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

  0%|          | 0/1 [00:00<?, ?it/s]


ValueError: only one element tensors can be converted to Python scalars

In [31]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features):
        super(RecurrentGCN, self).__init__()
        self.recurrent = DCRNN(node_features, 32, 1)
        self.linear = torch.nn.Linear(32, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

In [33]:
model = RecurrentGCN(node_features = 4)

optimizer = torch.optim.Adam( model.parameters(), lr=1e-2 )

model.train()

for epoch in tqdm(range(200)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        y_hat = model( snapshot.x, snapshot.edge_index, snapshot.edge_attr )
        cost = cost + torch.mean((y_hat-snapshot.y)**2)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

100%|██████████| 200/200 [00:38<00:00,  5.18it/s]


In [34]:
model.eval()
cost = 0
for time, snapshot in enumerate(test_dataset):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))

MSE: 1.0405
