In [1]:
import dgl
import copy
import glob
import math
import torch
import torch.nn as nn
import numpy as np
from os import path
from pathlib import Path
from dgl.data import DGLDataset
from TrainResults import TrainResults
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from SimpleGCN_OnlyNodeFeatures import GCN_OnlyNodeFeatures
from ToyDGLDataset import ToyDGLDataset, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

Using backend: pytorch


In [2]:
# The evaluation function
def eval(model, device, dataloader):
    model = model.to(device)
    model.eval()
    y_true = []
    y_logits = []

    for batched_graph, labels in dataloader:
        batched_graph = batched_graph.to(device)
        labels = labels.to(device)
        nodeFeatVec = GetNodeFeatureVectors(batched_graph)

        with torch.no_grad():
            pred = model(batched_graph, nodeFeatVec)

        y_true.append(labels.detach().cpu())
        y_logits.append(pred.detach().cpu())

    y_true = torch.cat(y_true, dim = 0).numpy()
    y_logits = torch.cat(y_logits, dim = 0).numpy()
    y_pred = y_logits.argmax(1)
    
    num_correct_pred = (y_pred == y_true).sum().item()
    num_total_pred = len(y_true)
    acc =  num_correct_pred / num_total_pred
    
    evalDict = {"y_true": y_true.tolist(), "y_logits": y_logits.tolist(), "y_pred": y_pred.tolist(), "acc": acc}

    return evalDict

In [3]:
def train(model, device, dataloader, optimizer, loss_fn, epochs):
    print(f'Device: {device}')
    model = model.to(device)
    model.train()
    
    results = TrainResults()
    bestModel = None
    bestValAcc = 0

    for epoch in range(epochs):
        for batched_graph, labels in dataloader:
            batched_graph = batched_graph.to(device)
            labels = labels.to(device)
            nodeFeatVec = GetNodeFeatureVectors(batched_graph)

            #forward
            pred =  model(batched_graph, nodeFeatVec)

            # compute loss
            loss = loss_fn(pred, labels)

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_result = eval(model, device, train_dataloader)
        val_result = eval(model, device, val_dataloader)
        test_result = eval(model, device, test_dataloader)
        
        results.addResult(loss.tolist(), train_result, val_result, test_result)
        if(epoch % 5 == 0):
            results.printLastResult()
        
        if results.best_val_acc > bestValAcc:
            bestValAcc = results.best_val_acc
            bestModel = copy.deepcopy(model)
        
    return results, bestModel

In [4]:
def getAllDatasetNames(datasetRootDir):
    files = glob.glob(datasetRootDir + '/**/*.json', recursive=True)
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

datasetRootDir = '/home/andrew/GNN_Sandbox/GraphToyDatasets'
datasetDirs, datasetNames = getAllDatasetNames(datasetRootDir)

In [6]:
for i in range(len(datasetDirs)):
    dataset = ToyDGLDataset(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    val_sampler = SubsetRandomSampler(splitIndices['valid'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=32, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=32, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=32, drop_last=False)
    
    # Create the model with given dimensions
    model = GCN_OnlyNodeFeatures(dataset.dim_nfeats, 16, dataset.num_graph_classes).to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 50
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = train(model, device, train_dataloader, optimizer, loss_fn, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(datasetDirs[i], 'GCN_OnlyNodeFeatures')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')

Done loading data from cached files.
Beginning training on dataset Toy0_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 7.2474, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.500
Epoch: 5, Loss: 0.7818, Train: 0.483, Valid: 0.475, Test: 0.500, AUC: 0.500
Epoch: 10, Loss: 0.8008, Train: 0.467, Valid: 0.475, Test: 0.500, AUC: 0.500
Epoch: 15, Loss: 0.7095, Train: 0.500, Valid: 0.450, Test: 0.525, AUC: 0.500
Epoch: 20, Loss: 0.7065, Train: 0.500, Valid: 0.400, Test: 0.475, AUC: 0.500
Epoch: 25, Loss: 0.6515, Train: 0.475, Valid: 0.400, Test: 0.450, AUC: 0.500
Epoch: 30, Loss: 0.6755, Train: 0.600, Valid: 0.525, Test: 0.400, AUC: 0.500
Epoch: 35, Loss: 0.8109, Train: 0.550, Valid: 0.525, Test: 0.425, AUC: 0.500
Epoch: 40, Loss: 0.7394, Train: 0.592, Valid: 0.525, Test: 0.350, AUC: 0.500
Epoch: 45, Loss: 0.6435, Train: 0.592, Valid: 0.525, Test: 0.375, AUC: 0.500
Best epoch: 
Epoch: 0, Loss: 7.2474, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.500
-------

Best epoch: 
Epoch: 1, Loss: 0.6709, Train: 0.499, Valid: 0.522, Test: 0.486, AUC: 0.502
------------------(8/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy1_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 28.3806, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.500
Epoch: 5, Loss: 1.0120, Train: 0.600, Valid: 0.725, Test: 0.600, AUC: 0.636
Epoch: 10, Loss: 1.0730, Train: 0.533, Valid: 0.575, Test: 0.475, AUC: 0.636
Epoch: 15, Loss: 0.5747, Train: 0.567, Valid: 0.550, Test: 0.650, AUC: 0.785
Epoch: 20, Loss: 0.5864, Train: 0.783, Valid: 0.775, Test: 0.800, AUC: 0.785
Epoch: 25, Loss: 0.6306, Train: 0.775, Valid: 0.800, Test: 0.825, AUC: 0.785
Epoch: 30, Loss: 0.5475, Train: 0.817, Valid: 0.775, Test: 0.825, AUC: 0.831
Epoch: 35, Loss: 0.5289, Train: 0.792, Valid: 0.875, Test: 0.825, AUC: 0.826
Epoch: 40, Loss: 0.5903, Train: 0.717, Valid: 0.850, Test: 0.725, AUC: 0.826
Epoch: 45, Loss: 0.5951,

Epoch: 40, Loss: 0.0688, Train: 0.993, Valid: 0.995, Test: 0.990, AUC: 0.990
Epoch: 45, Loss: 0.0230, Train: 0.985, Valid: 0.976, Test: 0.986, AUC: 0.990
Best epoch: 
Epoch: 28, Loss: 0.0320, Train: 0.990, Valid: 0.995, Test: 0.990, AUC: 0.990
------------------(16/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy2_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 2.5074, Train: 0.558, Valid: 0.500, Test: 0.425, AUC: 0.457
Epoch: 5, Loss: 0.6558, Train: 0.558, Valid: 0.475, Test: 0.550, AUC: 0.500
Epoch: 10, Loss: 0.6171, Train: 0.667, Valid: 0.650, Test: 0.600, AUC: 0.765
Epoch: 15, Loss: 0.5800, Train: 0.725, Valid: 0.700, Test: 0.725, AUC: 0.826
Epoch: 20, Loss: 0.5548, Train: 0.750, Valid: 0.775, Test: 0.725, AUC: 0.750
Epoch: 25, Loss: 0.8049, Train: 0.758, Valid: 0.750, Test: 0.850, AUC: 0.750
Epoch: 30, Loss: 0.6933, Train: 0.733, Valid: 0.700, Test: 0.775, AUC: 0.750
Epoch: 35, Loss: 0.5077

Epoch: 30, Loss: 0.0222, Train: 0.986, Valid: 0.986, Test: 0.983, AUC: 0.983
Epoch: 35, Loss: 0.0537, Train: 0.992, Valid: 0.993, Test: 0.989, AUC: 0.989
Epoch: 40, Loss: 0.0185, Train: 0.993, Valid: 0.993, Test: 0.991, AUC: 0.989
Epoch: 45, Loss: 0.0205, Train: 0.986, Valid: 0.983, Test: 0.983, AUC: 0.989
Best epoch: 
Epoch: 49, Loss: 0.0152, Train: 0.995, Valid: 0.998, Test: 0.991, AUC: 0.992
------------------(24/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy3_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 1.5269, Train: 0.842, Valid: 0.825, Test: 0.650, AUC: 0.636
Epoch: 5, Loss: 0.6135, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.636
Epoch: 10, Loss: 0.5578, Train: 0.542, Valid: 0.450, Test: 0.625, AUC: 0.889
Epoch: 15, Loss: 0.4328, Train: 0.958, Valid: 1.000, Test: 0.950, AUC: 0.927
Epoch: 20, Loss: 0.4429, Train: 0.908, Valid: 0.925, Test: 0.900, AUC: 0.927
Epoch: 25, Loss: 0.3502

Epoch: 20, Loss: 0.0021, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 0.997
Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 0.997
Epoch: 30, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 0.997
Epoch: 35, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 0.997
Epoch: 40, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 0.997
Epoch: 45, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 0.997
Best epoch: 
Epoch: 0, Loss: 0.2850, Train: 1.000, Valid: 1.000, Test: 0.998, AUC: 0.997
------------------(32/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy4_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 9.7302, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.500
Epoch: 5, Loss: 0.4394, Train: 0.583, Valid: 0.475, Test: 0.650, AUC: 1.000
Epoch: 10, Loss: 0.0206, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0007,

Epoch: 10, Loss: 0.0005, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 30, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 35, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 40, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 45, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 0, Loss: 0.0245, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(40/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy5_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 0.8842, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.500
Epoch: 5, Loss: 0.7110,

Epoch: 0, Loss: 0.2886, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 5, Loss: 0.0148, Train: 1.000, Valid: 1.000, Test: 0.999, AUC: 1.000
Epoch: 10, Loss: 0.0036, Train: 1.000, Valid: 1.000, Test: 0.999, AUC: 1.000
Epoch: 15, Loss: 0.0023, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0061, Train: 1.000, Valid: 1.000, Test: 0.999, AUC: 1.000
Epoch: 25, Loss: 0.0011, Train: 1.000, Valid: 1.000, Test: 0.999, AUC: 1.000
Epoch: 30, Loss: 0.0032, Train: 1.000, Valid: 0.998, Test: 0.998, AUC: 1.000
Epoch: 35, Loss: 0.0007, Train: 0.998, Valid: 0.993, Test: 0.995, AUC: 1.000
Epoch: 40, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 0.999, AUC: 1.000
Epoch: 45, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 0.999, AUC: 1.000
Best epoch: 
Epoch: 0, Loss: 0.2886, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(48/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy6_fewGraphs_f

------------------(55/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy6_manyGraphs_manyNodes_SameNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 0.0019, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 5, Loss: 0.0004, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 30, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 35, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 40, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 45, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 0, Loss: 0.0019, Tr