In [1]:
import dgl
import copy
import glob
import math
import torch
import torch.nn as nn
import numpy as np
from os import path
from pathlib import Path
from dgl.data import DGLDataset
from TrainResults import TrainResults
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from SimpleGCN_OnlyNodeFeatures import GCN_OnlyNodeFeatures
from ToyDGLDataset import ToyDGLDataset, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

Using backend: pytorch


In [2]:
# The evaluation function
def eval(model, device, dataloader):
    model = model.to(device)
    model.eval()
    y_true = []
    y_logits = []

    for batched_graph, labels in dataloader:
        batched_graph = batched_graph.to(device)
        labels = labels.to(device)
        nodeFeatVec = GetNodeFeatureVectors(batched_graph)

        with torch.no_grad():
            pred = model(batched_graph, nodeFeatVec)

        y_true.append(labels.detach().cpu())
        y_logits.append(pred.detach().cpu())

    y_true = torch.cat(y_true, dim = 0).numpy()
    y_logits = torch.cat(y_logits, dim = 0)
    y_softmax = nn.functional.softmax(y_logits, dim=1)
    y_scoreClass1 = y_softmax[:, 1]
    y_pred = y_logits.numpy().argmax(1)
    
    num_correct_pred = (y_pred == y_true).sum().item()
    num_total_pred = len(y_true)
    acc =  num_correct_pred / num_total_pred
    
    evalDict = {
        "y_true": y_true.tolist(), 
        "y_logits": y_logits.tolist(), 
        "y_scoreClass1": y_scoreClass1.tolist(),
        "y_pred": y_pred.tolist(), 
        "acc": acc
    }

    return evalDict

In [3]:
def train(model, device, dataloader, optimizer, loss_fn, epochs):
    print(f'Device: {device}')
    model = model.to(device)
    model.train()
    
    results = TrainResults()
    bestModel = None
    bestValAcc = 0

    for epoch in range(epochs):
        for batched_graph, labels in dataloader:
            batched_graph = batched_graph.to(device)
            labels = labels.to(device)
            nodeFeatVec = GetNodeFeatureVectors(batched_graph)

            #forward
            pred =  model(batched_graph, nodeFeatVec)

            # compute loss
            loss = loss_fn(pred, labels)

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_result = eval(model, device, train_dataloader)
        val_result = eval(model, device, val_dataloader)
        test_result = eval(model, device, test_dataloader)
        
        results.addResult(loss.tolist(), train_result, val_result, test_result)
        if(epoch % 5 == 0):
            results.printLastResult()
        
        if results.best_val_acc > bestValAcc:
            bestValAcc = results.best_val_acc
            bestModel = copy.deepcopy(model)
        
    return results, bestModel

In [4]:
def getAllDatasetNames(datasetRootDir):
    files = glob.glob(datasetRootDir + '/*/*/*.json', recursive=True)
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [5]:
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

datasetRootDir = '/home/andrew/GNN_Sandbox/GraphToyDatasets'
datasetDirs, datasetNames = getAllDatasetNames(datasetRootDir)

In [6]:
import time
now = time.time()

for i in range(len(datasetDirs)):
    dataset = ToyDGLDataset(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    val_sampler = SubsetRandomSampler(splitIndices['valid'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=32, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=32, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=32, drop_last=False)
    
    # Create the model with given dimensions
    model = GCN_OnlyNodeFeatures(dataset.dim_nfeats, 16, dataset.num_graph_classes).to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 30
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = train(model, device, train_dataloader, optimizer, loss_fn, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(datasetDirs[i], 'GCN_OnlyNodeFeatures')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')

end = time.time()
elapsed = end - now
print(elapsed)

Done loading data from cached files.
Beginning training on dataset Toy0_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cpu
Epoch: 0, Loss: 13.1278, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.472
Epoch: 5, Loss: 1.6584, Train: 0.500, Valid: 0.450, Test: 0.550, AUC: 0.472
Epoch: 10, Loss: 0.8241, Train: 0.517, Valid: 0.575, Test: 0.425, AUC: 0.462
Epoch: 15, Loss: 0.7210, Train: 0.542, Valid: 0.500, Test: 0.475, AUC: 0.462
Epoch: 20, Loss: 0.7045, Train: 0.617, Valid: 0.450, Test: 0.450, AUC: 0.462
Epoch: 25, Loss: 0.6312, Train: 0.633, Valid: 0.450, Test: 0.475, AUC: 0.462
Best epoch: 
Epoch: 9, Loss: 0.7555, Train: 0.500, Valid: 0.575, Test: 0.450, AUC: 0.462
------------------(1/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy0_fewGraphs_fewNodes_SameNodeCountDistributions
Device: cpu
Epoch: 0, Loss: 12.0663, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.437
Epoch: 5, Loss: 1.8416, Train: 0.500, Valid: 0.45

Epoch: 5, Loss: 0.5707, Train: 0.608, Valid: 0.550, Test: 0.600, AUC: 0.891
Epoch: 10, Loss: 0.6855, Train: 0.658, Valid: 0.625, Test: 0.675, AUC: 0.737
Epoch: 15, Loss: 0.6459, Train: 0.717, Valid: 0.700, Test: 0.700, AUC: 0.780
Epoch: 20, Loss: 0.6017, Train: 0.692, Valid: 0.675, Test: 0.675, AUC: 0.780
Epoch: 25, Loss: 0.5787, Train: 0.692, Valid: 0.675, Test: 0.675, AUC: 0.937
Best epoch: 
Epoch: 29, Loss: 0.5387, Train: 0.908, Valid: 0.900, Test: 0.875, AUC: 0.992
------------------(12/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy1_manyGraphs_fewNodes_DifferentNodeCountDistributions
Device: cpu
Epoch: 0, Loss: 0.7168, Train: 0.546, Valid: 0.542, Test: 0.536, AUC: 0.488
Epoch: 5, Loss: 0.5328, Train: 0.786, Valid: 0.799, Test: 0.787, AUC: 0.850
Epoch: 10, Loss: 0.4605, Train: 0.828, Valid: 0.848, Test: 0.839, AUC: 0.859
Epoch: 15, Loss: 0.2656, Train: 0.824, Valid: 0.853, Test: 0.830, AUC: 0.874
Epoch: 20, Loss: 0.4136,

Epoch: 10, Loss: 0.2013, Train: 0.920, Valid: 0.916, Test: 0.915, AUC: 0.973
Epoch: 15, Loss: 0.2000, Train: 0.950, Valid: 0.955, Test: 0.948, AUC: 0.988
Epoch: 20, Loss: 0.1631, Train: 0.956, Valid: 0.965, Test: 0.961, AUC: 0.993
Epoch: 25, Loss: 0.1028, Train: 0.965, Valid: 0.968, Test: 0.946, AUC: 0.995
Best epoch: 
Epoch: 29, Loss: 0.1223, Train: 0.973, Valid: 0.978, Test: 0.966, AUC: 0.998
------------------(23/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy2_manyGraphs_manyNodes_SameNodeCountDistributions
Device: cpu
Epoch: 0, Loss: 0.5794, Train: 0.579, Valid: 0.544, Test: 0.585, AUC: 0.903
Epoch: 5, Loss: 0.3956, Train: 0.957, Valid: 0.953, Test: 0.969, AUC: 0.995
Epoch: 10, Loss: 0.2211, Train: 0.912, Valid: 0.902, Test: 0.925, AUC: 0.996
Epoch: 15, Loss: 0.1725, Train: 0.972, Valid: 0.968, Test: 0.975, AUC: 0.997
Epoch: 20, Loss: 0.1251, Train: 0.971, Valid: 0.971, Test: 0.971, AUC: 0.998
Epoch: 25, Loss: 0.0781, Tr

Epoch: 15, Loss: 0.0381, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0264, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0253, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 6, Loss: 0.1513, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(34/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy4_fewGraphs_manyNodes_DifferentNodeCountDistributions
Device: cpu
Epoch: 0, Loss: 3.0433, Train: 0.500, Valid: 0.450, Test: 0.550, AUC: 0.000
Epoch: 5, Loss: 0.4206, Train: 0.950, Valid: 0.975, Test: 0.925, AUC: 1.000
Epoch: 10, Loss: 0.1328, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0696, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0437, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0286, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 7, L

Epoch: 20, Loss: 0.1232, Train: 0.921, Valid: 0.931, Test: 0.921, AUC: 0.972
Epoch: 25, Loss: 0.2178, Train: 0.923, Valid: 0.924, Test: 0.907, AUC: 0.972
Best epoch: 
Epoch: 14, Loss: 0.1390, Train: 0.919, Valid: 0.934, Test: 0.924, AUC: 0.972
------------------(45/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy5_manyGraphs_fewNodes_SameNodeCountDistributions
Device: cpu
Epoch: 0, Loss: 0.4997, Train: 0.905, Valid: 0.885, Test: 0.920, AUC: 0.972
Epoch: 5, Loss: 0.1073, Train: 0.931, Valid: 0.924, Test: 0.939, AUC: 0.987
Epoch: 10, Loss: 0.0477, Train: 0.932, Valid: 0.925, Test: 0.940, AUC: 0.988
Epoch: 15, Loss: 0.1858, Train: 0.936, Valid: 0.926, Test: 0.938, AUC: 0.988
Epoch: 20, Loss: 0.2000, Train: 0.931, Valid: 0.915, Test: 0.944, AUC: 0.988
Epoch: 25, Loss: 0.2038, Train: 0.937, Valid: 0.930, Test: 0.939, AUC: 0.988
Best epoch: 
Epoch: 21, Loss: 0.3047, Train: 0.937, Valid: 0.931, Test: 0.939, AUC: 0.988
---------------

Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 0, Loss: 0.0019, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(56/56) models trained------------------

624.8990068435669
