# IMPORT

In [1]:
import torch
import torch.nn.functional as F
from torch.nn import BCEWithLogitsLoss, GRUCell
from torch_geometric.data import Data
from sklearn.metrics import roc_auc_score,average_precision_score

import random

import bisect 

import gc
import copy

from itertools import permutations

import pandas as pd

from torch_geometric.utils import negative_sampling
import torch_geometric.transforms as T
from torch_geometric.transforms import SVDFeatureReduction
from torch_geometric.utils import train_test_split_edges
from torch_geometric.transforms import RandomLinkSplit,NormalizeFeatures,Constant,OneHotDegree
from torch_geometric.utils import from_networkx
from torch_geometric.nn import GCNConv,SAGEConv,GATv2Conv, GINConv, Linear
from scipy.stats import entropy

import torch
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

import copy
import itertools
import json

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# LOAD DATASET

In [2]:
from steemitdata import get_steemit_dataset

In [3]:
#Snapshots with constant encoder as node features
#Snapshots with textual features as node features

snapshots_c = get_steemit_dataset(preprocess='constant')
snapshots_t = get_steemit_dataset(preprocess='text')

In [4]:
#Snapshots with random features as node features
snapshots_ts = get_steemit_dataset(preprocess='constant')
for snap in snapshots_ts:
    snap.x = torch.randn(snap.num_nodes, 384)

# LOAD MODEL

In [5]:
from t3gnn import T3GNN

In [6]:
def roland_test(model, test_data, data, isnap, device='cpu'):
    model.eval()

    test_data = test_data.to(device)

    h, _ = model(test_data.x, test_data.edge_index, edge_label_index = test_data.edge_label_index, isnap=isnap)
    
    pred_cont_link = torch.sigmoid(h).cpu().detach().numpy()
    
    label_link = test_data.edge_label.cpu().detach().numpy()
      
    avgpr_score_link = average_precision_score(label_link, pred_cont_link)
    
    return avgpr_score_link

In [7]:
from sklearn.metrics import *

def roland_train_single_snapshot(model, data, train_data, val_data, test_data, isnap,\
                          last_embeddings, optimizer, device='cpu', num_epochs=50, verbose=False):
    
    avgpr_val_max = 0
    best_model = model
    train_data = train_data.to(device)
    best_epoch = -1
    best_current_embeddings = []
    
    avgpr_trains = []
    #avgpr_vals = []
    avgpr_tests = []
    
    tol = 1
    
    for epoch in range(num_epochs):
        model.train()
        ## Note
        ## 1. Zero grad the optimizer
        ## 2. Compute loss and backpropagate
        ## 3. Update the model parameters
        optimizer.zero_grad()

        pred,\
        current_embeddings =\
            model(train_data.x, train_data.edge_index, edge_label_index = train_data.edge_label_index,\
                  isnap=isnap, previous_embeddings=last_embeddings)
        
        loss = model.loss(pred, train_data.edge_label.type_as(pred)) #loss to fine tune on current snapshot

        loss.backward(retain_graph=True)  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.

        ##########################################

        log = 'Epoch: {:03d}\n AVGPR Train: {:.4f}, Val: {:.4f}, Test: {:.4f}\n MRR Train: {:.4f}, Val: {:.4f}, Test: {:.4f}\n F1-Score Train: {:.4f}, Val: {:.4f}, Test: {:.4f}\n Loss: {}'
        avgpr_score_val  = roland_test(model, val_data, data, isnap, device)
        
        if avgpr_val_max-tol <= avgpr_score_val:
            avgpr_val_max = avgpr_score_val
            best_epoch = epoch
            best_current_embeddings = current_embeddings
            best_model = model
        else:
            break
        
    avgpr_score_test = roland_test(model, test_data, data, isnap, device)
            
    return best_model, optimizer, avgpr_score_test, best_current_embeddings

In [8]:
def train_roland(snapshots, hidden_dimension, update='gru', device='cpu'):
    """
        Train and evaluate T3GNN with historical negative edges in the live update setting
    """
    num_snap = len(snapshots)
    input_channels = snapshots[0].x.size(1)
    num_nodes = snapshots[0].x.size(0)
    last_embeddings = [torch.Tensor([[0 for i in range(hidden_dimension)] for j in range(num_nodes)])]
 
    avgpr_test_singles = []
    
    roland = T3GNN(input_channels, 2, hidden_dimension, dropout=0.3, update=update)
    rolopt = torch.optim.Adam(params=roland.parameters(), lr=0.01, weight_decay = 5e-3)
    roland.reset_parameters()
    
    for i in range(num_snap-1):
        #CREATE TRAIN + VAL + TEST SET FOR THE CURRENT SNAP
        snapshot = copy.deepcopy(snapshots[i])
        num_current_edges = len(snapshot.edge_index[0])
        transform = RandomLinkSplit(num_val=0.0,num_test=0.25)
        train_data, _, val_data = transform(snapshot)
        test_data = copy.deepcopy(snapshots[i+1])
        
        #NEGATIVE SET: EDGES CLOSED IN THE PAST BUT NON IN THE CURRENT TEST SET
        past_edges = set(zip([int(e) for e in snapshot.edge_index[0]],\
                             [int(e) for e in snapshot.edge_index[1]]))
        current_edges = set(zip([int(e) for e in test_data.edge_index[0]],\
                             [int(e) for e in test_data.edge_index[1]]))
        
        negative_edges = list(past_edges.difference(current_edges))[:test_data.edge_index.size(1)]
        future_neg_edge_index = torch.Tensor([[a[0] for a in negative_edges],\
                                                 [a[1] for a in negative_edges]]).long()
        
        num_pos_edge = test_data.edge_index.size(1)
        num_neg_edge = future_neg_edge_index.size(1)
        test_data.edge_label = torch.Tensor(np.array([1 for i in range(num_pos_edge)] + [0 for i in range(num_neg_edge)]))
        test_data.edge_label_index = torch.cat([test_data.edge_index, future_neg_edge_index], dim=-1)
        

        print(train_data)
        print(val_data)
        print(test_data)
        print(last_embeddings[0].shape)
        print(last_embeddings[1].shape)
        #TRAIN AND TEST THE MODEL FOR THE CURRENT SNAP
        roland, rolopt, avgpr_test, last_embeddings =\
            roland_train_single_snapshot(roland, snapshot, train_data, val_data, test_data, i,\
                                  last_embeddings, rolopt)
        
        
        #SAVE AND DISPLAY EVALUATION
        print(f'Snapshot: {i}\n\tT3GNN AVGPR Test: {avgpr_test}')
        avgpr_test_singles.append(avgpr_test)
        
    avgpr_test_all = sum(avgpr_test_singles)/len(avgpr_test_singles)
    
    print(f'T3GNN AVGPR over time Test: {avgpr_test_all}')
    
    return avgpr_test_singles

In [9]:
hidden_conv1 = 64
hidden_conv2 = 32

ro_constant_avgpr = train_roland(snapshots_c, hidden_conv1, hidden_conv2, update='mlp') #no-features

Data(num_nodes=14814, edge_index=[2, 29953], x=[14814, 1], edge_label=[59906], edge_label_index=[2, 59906])
Data(num_nodes=14814, edge_index=[2, 29953], x=[14814, 1], edge_label=[19968], edge_label_index=[2, 19968])
Data(num_nodes=14814, edge_index=[2, 3144], x=[14814, 1], edge_label=[6288], edge_label_index=[2, 6288])
torch.Size([14814, 64])
torch.Size([14814, 32])
Snapshot: 0
	T3GNN AVGPR Test: 0.7351054408954614
Data(num_nodes=14814, edge_index=[2, 2358], x=[14814, 1], edge_label=[4716], edge_label_index=[2, 4716])
Data(num_nodes=14814, edge_index=[2, 2358], x=[14814, 1], edge_label=[1572], edge_label_index=[2, 1572])
Data(num_nodes=14814, edge_index=[2, 2751], x=[14814, 1], edge_label=[5502], edge_label_index=[2, 5502])
torch.Size([14814, 64])
torch.Size([14814, 64])
Snapshot: 1
	T3GNN AVGPR Test: 0.58226996790924
Data(num_nodes=14814, edge_index=[2, 2064], x=[14814, 1], edge_label=[4128], edge_label_index=[2, 4128])
Data(num_nodes=14814, edge_index=[2, 2064], x=[14814, 1], edge_la