To use pytorch geometric temporal, make sure you have torch 1.9.0 installed (uninstall 1.10.0 before).

In [1]:
import torch
print(torch.__version__)

1.9.0+cpu


In [2]:
import torch
import numpy as np

In [None]:
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.9.0+cpu.html
!pip install torch-geometric
!pip install torch-geometric-temporal

In [3]:
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler

def transform_and_split(data):
    # Normalize node features and transform data type
    data.x = normalize(data.x, axis=1, norm='max')
    data.x = torch.from_numpy(data.x).to(torch.float64)
    data.y = data.y.apply_(lambda x:  1 if (x > 0) else 0) # Change y into {0, 1} for binary classification
    data.y = data.y.to(torch.float64)    
    data.edge_attr = data.edge_attr.to(torch.double)


    # Split into train/test set
#    split = nodeSplit(split="train_rest", num_splits = 1, num_val = 0.0, num_test= 0.2)
#    masked_data = split(data)

#    print("Training samples:", torch.sum(masked_data.train_mask).item())
#    print("Validation samples:", torch.sum(masked_data.val_mask ).item())
#    print("Test samples:", torch.sum(masked_data.test_mask ).item())
    print_basic_info(data)
    return data

In [4]:
def print_basic_info(data):
    print()
    print(data)
    print('===========================================================================================================')

    print(f'Number of nodes: {data.num_nodes}')
    print(f'Number of edges: {data.num_edges}')
    print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
    print(f'Has isolated nodes: {data.has_isolated_nodes()}')
    print(f'Has self-loops: {data.has_self_loops()}')
    print(f'Is undirected: {data.is_undirected()}')

### Get and split data

In [5]:
path = "../data/processed/twitter/2018_q1.pt" # Customize...
dataset = torch.load(path)
data = dataset[0]
transformed_data = transform_and_split(data)


Data(x=[29, 61], edge_index=[2, 400], edge_attr=[400], y=[29])
Number of nodes: 29
Number of edges: 400
Average node degree: 13.79
Has isolated nodes: False
Has self-loops: True
Is undirected: True


In [6]:
from torch_geometric_temporal.dataset import ChickenpoxDatasetLoader

loader = ChickenpoxDatasetLoader()

dataset = loader.get_dataset()

In [7]:
dataset.edge_weight

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [8]:
path = "../data/processed/twitter/"

In [9]:
quarter = ['2016_q4']
for i in range(2017, 2022):
    for j in range(1, 5):
        if i == 2021 and j == 4: break
        quarter.append(str(i)+'_q'+str(j))

In [10]:
quarter

['2016_q4',
 '2017_q1',
 '2017_q2',
 '2017_q3',
 '2017_q4',
 '2018_q1',
 '2018_q2',
 '2018_q3',
 '2018_q4',
 '2019_q1',
 '2019_q2',
 '2019_q3',
 '2019_q4',
 '2020_q1',
 '2020_q2',
 '2020_q3',
 '2020_q4',
 '2021_q1',
 '2021_q2',
 '2021_q3']

In [11]:
paths = []
for i in quarter:
    paths.append(path+i+'.pt')

In [12]:
paths

['../data/processed/twitter/2016_q4.pt',
 '../data/processed/twitter/2017_q1.pt',
 '../data/processed/twitter/2017_q2.pt',
 '../data/processed/twitter/2017_q3.pt',
 '../data/processed/twitter/2017_q4.pt',
 '../data/processed/twitter/2018_q1.pt',
 '../data/processed/twitter/2018_q2.pt',
 '../data/processed/twitter/2018_q3.pt',
 '../data/processed/twitter/2018_q4.pt',
 '../data/processed/twitter/2019_q1.pt',
 '../data/processed/twitter/2019_q2.pt',
 '../data/processed/twitter/2019_q3.pt',
 '../data/processed/twitter/2019_q4.pt',
 '../data/processed/twitter/2020_q1.pt',
 '../data/processed/twitter/2020_q2.pt',
 '../data/processed/twitter/2020_q3.pt',
 '../data/processed/twitter/2020_q4.pt',
 '../data/processed/twitter/2021_q1.pt',
 '../data/processed/twitter/2021_q2.pt',
 '../data/processed/twitter/2021_q3.pt']

In [13]:
data_list = []

In [14]:
for path in paths:
    dataset = torch.load(path)
    data = dataset[0]
    data_list.append(transform_and_split(data))


Data(x=[29, 63], edge_index=[2, 760], edge_attr=[760], y=[29])
Number of nodes: 29
Number of edges: 760
Average node degree: 26.21
Has isolated nodes: False
Has self-loops: True
Is undirected: True

Data(x=[29, 62], edge_index=[2, 312], edge_attr=[312], y=[29])
Number of nodes: 29
Number of edges: 312
Average node degree: 10.76
Has isolated nodes: False
Has self-loops: True
Is undirected: True

Data(x=[29, 63], edge_index=[2, 400], edge_attr=[400], y=[29])
Number of nodes: 29
Number of edges: 400
Average node degree: 13.79
Has isolated nodes: False
Has self-loops: True
Is undirected: True

Data(x=[29, 63], edge_index=[2, 552], edge_attr=[552], y=[29])
Number of nodes: 29
Number of edges: 552
Average node degree: 19.03
Has isolated nodes: False
Has self-loops: True
Is undirected: True

Data(x=[29, 63], edge_index=[2, 805], edge_attr=[805], y=[29])
Number of nodes: 29
Number of edges: 805
Average node degree: 27.76
Has isolated nodes: False
Has self-loops: True
Is undirected: True

Data

In [15]:
len(data_list)

20

In [16]:
data_list[1].x.shape

torch.Size([29, 62])

In [17]:
"""
edge_indices = [i.edge_index.double() for i in data_list]
edge_weights = [i.edge_attr.double() for i in data_list]
features = [i.x.double() for i in data_list]
targets = [i.y.double() for i in data_list]
"""
"""
edge_indices = [i.edge_index.cpu().detach().numpy() for i in data_list]
edge_weights = [i.edge_attr.cpu().detach().numpy() for i in data_list]
features = [i.x.cpu().detach().numpy() for i in data_list]
targets = [i.y.cpu().detach().numpy() for i in data_list]
"""
edge_indices = [i.edge_index.numpy() for i in data_list]
edge_weights = [i.edge_attr.numpy() for i in data_list]
features = [i.x.numpy() for i in data_list]
targets = [i.y.numpy() for i in data_list]

In [18]:
features[1].shape

(29, 62)

In [19]:
padded_features = []
for i in features:
    padded_features.append(np.pad(i, [(0, 0), (0, 64-i.shape[1])], 'mean'))

In [20]:
from torch_geometric_temporal.signal import DynamicGraphTemporalSignal

In [21]:
temporal_signal = DynamicGraphTemporalSignal(edge_indices = edge_indices , edge_weights = edge_weights, features = padded_features, targets = targets)

In [22]:
temporal_signal

<torch_geometric_temporal.signal.dynamic_graph_temporal_signal.DynamicGraphTemporalSignal at 0x26f36af77f0>

In [23]:
from torch_geometric_temporal.signal import temporal_signal_split

train_dataset, test_dataset = temporal_signal_split(temporal_signal, train_ratio=0.8)

In [24]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import DCRNN
from torch_geometric_temporal import TemporalConv
from torch_geometric_temporal import EvolveGCNO
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features):
        super(RecurrentGCN, self).__init__()
        self.evol = EvolveGCNO(node_features)
        self.recurrent = DCRNN(node_features, 32, 1)
        self.linear = torch.nn.Linear(64, 1)
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, x, edge_index, edge_weight):
        h = self.evol(x, edge_index, edge_weight)
#        h = self.dropout(h)
#        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

In [25]:
for time, snapshot in enumerate(temporal_signal):
    print(time)
    print(snapshot)

0
Data(x=[29, 64], edge_index=[2, 760], edge_attr=[760], y=[29])
1
Data(x=[29, 64], edge_index=[2, 312], edge_attr=[312], y=[29])
2
Data(x=[29, 64], edge_index=[2, 400], edge_attr=[400], y=[29])
3
Data(x=[29, 64], edge_index=[2, 552], edge_attr=[552], y=[29])
4
Data(x=[29, 64], edge_index=[2, 805], edge_attr=[805], y=[29])
5
Data(x=[29, 64], edge_index=[2, 400], edge_attr=[400], y=[29])
6
Data(x=[29, 64], edge_index=[2, 616], edge_attr=[616], y=[29])
7
Data(x=[29, 64], edge_index=[2, 585], edge_attr=[585], y=[29])
8
Data(x=[29, 64], edge_index=[2, 805], edge_attr=[805], y=[29])
9
Data(x=[29, 64], edge_index=[2, 552], edge_attr=[552], y=[29])
10
Data(x=[29, 64], edge_index=[2, 552], edge_attr=[552], y=[29])
11
Data(x=[29, 64], edge_index=[2, 672], edge_attr=[672], y=[29])
12
Data(x=[29, 64], edge_index=[2, 777], edge_attr=[777], y=[29])
13
Data(x=[29, 64], edge_index=[2, 480], edge_attr=[480], y=[29])
14
Data(x=[29, 64], edge_index=[2, 585], edge_attr=[585], y=[29])
15
Data(x=[29, 64], 

In [26]:
from tqdm import tqdm

model = RecurrentGCN(node_features = 64)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in tqdm(range(200)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
        cost = cost + torch.mean((y_hat-snapshot.y)**2)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

100%|██████████| 200/200 [00:09<00:00, 21.95it/s]


In [27]:
y_hat_l = []
model.eval()
cost = 0
for time, snapshot in enumerate(test_dataset):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
    y_hat_l.append(y_hat)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))


MSE: 0.2267


In [28]:
y_hat_l

[tensor([[0.6851],
         [0.6821],
         [0.6825],
         [0.7087],
         [0.6917],
         [0.6841],
         [0.6822],
         [0.6838],
         [0.6896],
         [0.6963],
         [0.6868],
         [0.6874],
         [0.6823],
         [0.6826],
         [0.6821],
         [0.6823],
         [0.6973],
         [0.6842],
         [0.7016],
         [0.6821],
         [0.6858],
         [0.6837],
         [0.6856],
         [0.6821],
         [0.6826],
         [0.7724],
         [0.6830],
         [0.6821],
         [0.6830]], grad_fn=<AddmmBackward>),
 tensor([[0.6738],
         [0.6740],
         [0.6742],
         [0.6851],
         [0.6843],
         [0.6739],
         [0.6741],
         [0.6747],
         [0.6807],
         [0.6856],
         [0.6758],
         [0.6756],
         [0.6741],
         [0.6741],
         [0.6740],
         [0.6742],
         [0.6781],
         [0.6741],
         [0.6749],
         [0.6740],
         [0.6740],
         [0.6809],
    

In [29]:
y_hat_l = [list(np.squeeze(i.detach().numpy())) for i in y_hat_l]
y_hat_l = [z for y in y_hat_l for z in y]

In [42]:
y_hat_list = [1 if x > 0.66 else 0 for x in y_hat_l]

In [43]:
true_label = []
for time, snapshot in enumerate(test_dataset):
    true_label.append(list(snapshot.y.detach().numpy()))

In [44]:
true_label = [int(z) for y in true_label for z in y]

In [45]:
from sklearn.metrics import classification_report
y_true = true_label
target_names = ['class 0', 'class 1']
print(classification_report(y_true, y_hat_list, target_names=target_names))

              precision    recall  f1-score   support

     class 0       0.67      0.52      0.59        42
     class 1       0.76      0.85      0.80        74

    accuracy                           0.73       116
   macro avg       0.71      0.69      0.69       116
weighted avg       0.73      0.73      0.72       116

