In [1]:
#imports 
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import random
from spektral.utils import tic, toc
from models import *
from utils import *
#check if the gpu is available
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
#set seed so results are reproducible
random.seed(1)



[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9654898608392036199
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4160159744
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11372315678574588484
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5"
]


In [2]:
#read the dataset
def read_data(id: str): #DBLP3, DBLP5, Brain, Reddit, DBLPE
    #pick which dataset to load
    dataset_dict=dict()
    dataset_dict["DBLP3"]="Datasets/DBLP3.npz"
    dataset_dict["DBLP5"]="Datasets/DBLP5.npz"
    dataset_dict["Brain"]="Datasets/Brain.npz"
    dataset_dict["Reddit"]="Datasets/reddit.npz"
    dataset_dict["DBLPE"]="Datasets/DBLPE.npz"

    dataset = np.load(dataset_dict[id])

    #get the adjacency matrix
    adjs = dataset["adjs"] #(time, node, node)

    #Remove nodes with no connections at any timestep
    #this shrinks the data considerably
    temporal_sum = tf.math.reduce_sum(adjs, axis=0, keepdims=False, name=None)
    row_sum = tf.math.reduce_sum(temporal_sum, axis=0, keepdims=False, name=None)
    non_zero_indices = np.flatnonzero(row_sum)
    adjs = adjs[:,non_zero_indices,:]
    adjs = adjs[:,:,non_zero_indices]

    #DBLPE is a dynamic featureless graph
    if id=="DBLPE":
        labels = dataset["labels"] #(nodes, time, class)

        # labels = np.argmax(labels,axis=2)
        labels=labels[non_zero_indices]
        feats=np.zeros([adjs.shape[1], adjs.shape[0], adjs.shape[2]])

        #since there are no features just fill in the identity matrix
        for i in range(feats.shape[1]):
            feats[:,i,:]=np.eye(feats.shape[0])
      
    #All others are static feature-full graphs
    else:
        labels = dataset["labels"] #(nodes, class)
        feats = dataset["attmats"] #(node, time, feat)

        # labels = np.argmax(labels, axis=1)
        labels = labels[non_zero_indices]
        feats = feats[non_zero_indices]

    #Other important variables
    n_nodes = adjs.shape[1]
    n_timesteps = adjs.shape[0]
    n_class = int(labels.shape[1])
    n_feat = feats.shape[2]

    #Train Val Test split
    nodes_id = list(range(n_nodes))
    random.shuffle(nodes_id)
    idx_train = nodes_id[:(7*n_nodes)//10]
    idx_train = [True if i in idx_train else False for i in list(range(n_nodes))]
    idx_val = nodes_id[(7*n_nodes)//10: (9*n_nodes)//10]
    idx_val = [True if i in idx_val else False for i in list(range(n_nodes))]
    idx_test = nodes_id[(9*n_nodes)//10: n_nodes]
    idx_test = [True if i in idx_test else False for i in list(range(n_nodes))]

    #custom data type that holds everything i might need
    return STG_Dataset(tf.convert_to_tensor(adjs,dtype=tf.float32),
                        tf.convert_to_tensor(adjs,dtype=tf.float32),
                        tf.convert_to_tensor(feats,dtype=tf.float32), 
                        tf.convert_to_tensor(feats,dtype=tf.float32), 
                        tf.convert_to_tensor(labels,dtype=tf.float32), 
                        tf.convert_to_tensor(labels,dtype=tf.float32), 
                        n_nodes, n_timesteps, n_class, n_feat, 
                        np.array(idx_train),
                        np.array(idx_val),
                        np.array(idx_test))
    

In [3]:
# Training step
@tf.function
def train(feats, adjs, labels, idx_train, idx_val, model, loss_fn, optimizer, acc):
    #training
    with tf.GradientTape() as tape:
        predictions = model([feats, adjs], training=True)
        loss_train = loss_fn(labels[idx_train], predictions[idx_train])
    gradients = tape.gradient(loss_train, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    #evaluating
    predictions = model([feats, adjs], training=False)
    loss_val = loss_fn(labels[idx_val], predictions[idx_val])
    acc.update_state(labels[idx_val], predictions[idx_val])
    return loss_train


@tf.function
#testing step
def test(feats, adjs, labels, idx_test, model, loss_fn, optimizer, acc, auc, f1):
    predictions = model([feats, adjs], training=False)
    loss_test = loss_fn(labels[idx_test], predictions[idx_test])

    #updating metrics state
    acc.update_state(labels[idx_test], predictions[idx_test])
    auc.update_state(labels[idx_test], predictions[idx_test])
    f1.update_state(labels[idx_test], predictions[idx_test])
    return loss_test

In [4]:
#a single epoch of a train and test loop
def timestep_train_test(epochs, model, data, loss_fn, optimizer, val_acc, acc, auc, f1):
    best_val=0
    tic()
    #for each epoch
    for epoch in range(1, epochs + 1):
        #calculate the loss
        loss_train = train(data.feats_timestep, data.adjs_timestep, data.labels_timestep, data.idx_train, data.idx_val, model, loss_fn, optimizer, val_acc)
        #keep track of best val acc
        if val_acc.result() > best_val:
            best_val = val_acc.result()
        val_acc.reset_state()
    print(f"Best Training Loss {loss_train}")
    print(f"Best Val Acc: {best_val}")

    #after training test the data
    loss_test = test(data.feats_timestep, data.adjs_timestep, data.labels_timestep, data.idx_test, model, loss_fn, optimizer, acc, auc, f1)
    print(f"Test Loss: {loss_test}, Test Acc: {acc.result()}, Test F1 score: {f1.result()}, Auc Test: {auc.result()}")
    # print(f"lambda: {model.trainable_weights[0]}")
    toc(f"{model.name} ({epochs} epochs)")

In [5]:
#wrapper over train and test loop. Takes in data name and model name and then trains and evaluates
def overall_train_test(data_id, model_id):
    #Constant parameters
    epochs = 500
    dropout_rate = 0.5
    lr = 25e-4
    weight_decay = 5e-4
    ignores_temporal_data = ["GAT", "GCN", "GraphSage"]

    #read the data
    data = read_data(data_id)
    #Each model takes in different parameters so this is where i decide which model to build
    if model_id.__name__ not in ["TRNNGCN", "EGCN"]:
        model = model_id(data.n_class, data.n_class, dropout_rate)
    elif model_id.__name__ == "EGCN":
        model = model_id(data.n_feat, data.n_class, data.n_class)
    else:
        model = model_id(data.n_nodes, data.n_class, data.n_class, dropout_rate)

    #If the model ignores temporal data it only takes in 2 dimensions
    if (model_id.__name__ in ignores_temporal_data):
        model.build([(data.n_nodes, data.n_feat), (data.n_nodes, data.n_nodes)])
    #else it needs to take in time
    else:
        model.build([(data.n_nodes, data.n_timestamps, data.n_feat), (data.n_timestamps, data.n_nodes, data.n_nodes)])
    model.summary()
    optimizer = tfa.optimizers.AdamW(learning_rate=lr, weight_decay=weight_decay)
    loss_fn = tf.keras.losses.CategoricalCrossentropy()
    #Metrics can only be created once
    val_acc = tf.keras.metrics.CategoricalAccuracy()
    acc = tf.keras.metrics.CategoricalAccuracy()
    auc = tf.keras.metrics.AUC(num_thresholds=data.adjs.shape[0], multi_label=False)
    f1 = tfa.metrics.F1Score(data.labels.shape[1], average="weighted")

    #preprocessing
    #for each timestep
    for timestep in range(1, data.n_timestamps+1):
        #keep track of the timesteps current position
        data.adjs_timestep = tf.identity(data.adjs[:timestep,:,:])
        data.feats_timestep = tf.identity(data.feats)
        data.labels_timestep = tf.identity(data.labels)
        if (data_id == "DBLPE"):
            data.labels_timestep = data.labels_timestep[:,timestep-1]
        
        #If the model ignores temporal data, accumulate adj matrices
        if (model_id.__name__ in ignores_temporal_data):
            data.adjs_timestep = tf.math.reduce_sum(data.adjs_timestep, axis=0, keepdims=False, name=None)
            data.feats_timestep = data.feats_timestep[:, -1, :]

            #normalize the adj matrix
            data.adjs_timestep += tf.eye(data.adjs_timestep.shape[0])
            d = tf.reduce_sum(data.adjs_timestep, axis=1)
            normalizing_matrix = np.zeros((data.adjs_timestep.shape[0], data.adjs_timestep.shape[0]))
            normalizing_matrix[range(len(normalizing_matrix)), range(len(normalizing_matrix))] = d**(-0.5)
            normalizing_matrix = tf.convert_to_tensor(normalizing_matrix, dtype=tf.float32)
            data.adjs_timestep = tf.matmul(normalizing_matrix,data.adjs_timestep)
            data.adjs_timestep=tf.matmul(tf.matmul(normalizing_matrix,data.adjs_timestep), normalizing_matrix)

        timestep_train_test(epochs, model, data, loss_fn, optimizer, val_acc, acc, auc, f1)
            


In [6]:
# tf.config.run_functions_eagerly(True)

#input model and dataset and everything will run itself
overall_train_test("DBLP3", GAT)

Model: "gat"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            multiple                  0         
_________________________________________________________________
gat_conv (GATConv)           multiple                  309       
_________________________________________________________________
gat_conv_1 (GATConv)         multiple                  18        
Total params: 327
Trainable params: 327
Non-trainable params: 0
_________________________________________________________________
Best Training Loss 0.6628769040107727
Best Val Acc: 0.7402135133743286
Test Loss: 0.7425931692123413, Test Acc: 0.7234042286872864, Test F1 score: 0.6073023676872253, Auc Test: 0.8087747097015381
gat (500 epochs)
Elapsed: 8.82s
Best Training Loss 0.6197769045829773
Best Val Acc: 0.7402135133743286
Test Loss: 0.7074084281921387, Test Acc: 0.7234042286872864, Test F1 score: 0.6073023676872253, 