In [1]:
%load_ext autoreload
import os
import sys

import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import EvolveGCNH
from torch_geometric_temporal.signal import temporal_signal_split

sys.path.append("..")
from pyg_temporal.temporal_graph import DatasetLoader

In [2]:
%%time
%autoreload
data_dir = os.path.join("..", "data", "mooc")
loader = DatasetLoader()
dataset = loader.get_dataset(data_dir, 30)

train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

Loading edges: ../data/mooc/edge_0.csv
Loading edges: ../data/mooc/edge_1.csv
Loading edges: ../data/mooc/edge_2.csv
Loading edges: ../data/mooc/edge_3.csv
Loading edges: ../data/mooc/edge_4.csv
Loading edges: ../data/mooc/edge_5.csv
Loading edges: ../data/mooc/edge_6.csv
Loading edges: ../data/mooc/edge_7.csv
Loading edges: ../data/mooc/edge_8.csv
Loading edges: ../data/mooc/edge_9.csv
Loading edges: ../data/mooc/edge_10.csv
Loading edges: ../data/mooc/edge_11.csv
Loading edges: ../data/mooc/edge_12.csv
Loading edges: ../data/mooc/edge_13.csv
Loading edges: ../data/mooc/edge_14.csv
Loading edges: ../data/mooc/edge_15.csv
Loading edges: ../data/mooc/edge_16.csv
Loading edges: ../data/mooc/edge_17.csv
Loading edges: ../data/mooc/edge_18.csv
Loading edges: ../data/mooc/edge_19.csv
Loading edges: ../data/mooc/edge_20.csv
Loading edges: ../data/mooc/edge_21.csv
Loading edges: ../data/mooc/edge_22.csv
Loading edges: ../data/mooc/edge_23.csv
Loading edges: ../data/mooc/edge_24.csv
Loading ed

In [3]:
print("Num timestamps:", train_dataset.snapshot_count)
feats = train_dataset.features[0]
print("feats:", np.min(feats), np.max(feats), feats.shape)
print("edge_index:", train_dataset.edge_indices[0])
# print("edge_weight:", train_dataset.edge_weights[0])
targets = train_dataset.targets[0]
print("targets:", np.min(targets), np.max(targets), targets.shape)

Num timestamps: 24
feats: 0.0 1.0 (683559, 4)
edge_index: [[    0     0     0 ... 50928 51023 51120]
 [    1     2     3 ... 51122 51120 51122]]
targets: 0 1 (683559,)


In [4]:
# EvolveGCN-H
class EGCN_H(torch.nn.Module):
    def __init__(self, num_nodes, num_features, hidden_size, num_classes=2):
        super().__init__()
        self.recurrent = EvolveGCNH(num_nodes, num_features)
        self.mlp = torch.nn.Sequential(torch.nn.Linear(num_features, hidden_size),
                                       torch.nn.ReLU(),
                                       torch.nn.Linear(hidden_size, num_classes))

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.mlp(h)
        h = F.log_softmax(h, dim=1)
        return h

In [5]:
num_nodes, num_feats = dataset.features[0].shape
print(num_nodes, num_feats)
model = EGCN_H(num_nodes, num_feats, 128)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()

683559 4


EGCN_H(
  (recurrent): EvolveGCNH(
    (pooling_layer): TopKPooling(4, ratio=5.851726039741997e-06, multiplier=1.0)
    (recurrent_layer): GRU(4, 4)
    (conv_layer): GCNConv_Fixed_W(4, 4)
  )
  (mlp): Sequential(
    (0): Linear(in_features=4, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=2, bias=True)
  )
)

In [6]:
%%time
# Training
for step, snapshot in enumerate(train_dataset):
    print("step:", step)
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_weight)
    loss = F.nll_loss(y_hat, snapshot.y)
    loss.backward(retain_graph=True)

step: 0
step: 1
step: 2
step: 3
step: 4
step: 5
step: 6
step: 7
step: 8
step: 9
step: 10
step: 11
step: 12
step: 13
step: 14
step: 15
step: 16
step: 17
step: 18
step: 19
step: 20
step: 21
step: 22
step: 23
CPU times: user 37.5 s, sys: 5.15 s, total: 42.6 s
Wall time: 19.5 s


In [7]:
%%time
# Testing
model.eval()
loss = 0
for step, snapshot in enumerate(test_dataset):
    if step == 0:
        model.recurrent.weight = None
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_weight)
    loss = loss + F.nll_loss(y_hat, snapshot.y)
loss = loss / (step+1)
loss = loss.item()
print("Loss: {:.4f}".format(loss))

predictions = list(y_hat.detach().cpu())
actual = snapshot.y

Loss: 0.6206
CPU times: user 5.16 s, sys: 1.57 s, total: 6.73 s
Wall time: 3.34 s


In [8]:
tp, fp, tn, fn = 0, 0, 0, 0
for pred, act in zip(predictions, actual):
    is_exist = pred[0] <= pred[1]
    if is_exist:
        if act:
            tp += 1
        else:
            fp += 1
    else:
        if act:
            fn += 1
        else:
            tn += 1

In [9]:
precision = 0.0 if tp == 0 else tp / (tp+fp)
recall = 0.0 if tp == 0 else tp / (tp+fn)
f1 = 0.0 if tp == 0 else 2 * precision * recall / (precision + recall)
print("TP: {}, FP: {}, TN: {}, FN: {}".format(tp, fp, tn, fn))
print("Precision: {:.4f}, Recall: {:.4f}, F1: {:.4f}".format(precision, recall, f1))

TP: 0, FP: 0, TN: 674477, FN: 9082
Precision: 0.0000, Recall: 0.0000, F1: 0.0000
