In [1]:
import dgl
import copy
import glob
import math
import torch
import torch.nn as nn
import numpy as np
from os import path
from pathlib import Path
from dgl.data import DGLDataset
from TrainResults import TrainResults
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from SimpleGCN_OnlyNodeFeatures import GCN_OnlyNodeFeatures
from ToyDGLDataset import ToyDGLDataset, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

Using backend: pytorch


In [2]:
# The evaluation function
def eval(model, device, dataloader):
    model = model.to(device)
    model.eval()
    y_true = []
    y_logits = []

    for batched_graph, labels in dataloader:
        batched_graph = batched_graph.to(device)
        labels = labels.to(device)
        nodeFeatVec = GetNodeFeatureVectors(batched_graph)

        with torch.no_grad():
            pred = model(batched_graph, nodeFeatVec)

        y_true.append(labels.detach().cpu())
        y_logits.append(pred.detach().cpu())

    y_true = torch.cat(y_true, dim = 0).numpy()
    y_logits = torch.cat(y_logits, dim = 0)
    y_softmax = nn.functional.softmax(y_logits, dim=1)
    y_scoreClass1 = y_softmax[:, 1]
    y_pred = y_logits.numpy().argmax(1)
    
    num_correct_pred = (y_pred == y_true).sum().item()
    num_total_pred = len(y_true)
    acc =  num_correct_pred / num_total_pred
    
    evalDict = {
        "y_true": y_true.tolist(), 
        "y_logits": y_logits.tolist(), 
        "y_scoreClass1": y_scoreClass1.tolist(),
        "y_pred": y_pred.tolist(), 
        "acc": acc
    }

    return evalDict

In [3]:
def train(model, device, dataloader, optimizer, loss_fn, epochs):
    print(f'Device: {device}')
    model = model.to(device)
    model.train()
    
    results = TrainResults()
    bestModel = None
    bestValAcc = 0

    for epoch in range(epochs):
        for batched_graph, labels in dataloader:
            batched_graph = batched_graph.to(device)
            labels = labels.to(device)
            nodeFeatVec = GetNodeFeatureVectors(batched_graph)

            #forward
            pred =  model(batched_graph, nodeFeatVec)

            # compute loss
            loss = loss_fn(pred, labels)

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_result = eval(model, device, train_dataloader)
        val_result = eval(model, device, val_dataloader)
        test_result = eval(model, device, test_dataloader)
        
        results.addResult(loss.tolist(), train_result, val_result, test_result)
        if(epoch % 5 == 0):
            results.printLastResult()
        
        if results.best_val_acc > bestValAcc:
            bestValAcc = results.best_val_acc
            bestModel = copy.deepcopy(model)
        
    return results, bestModel

In [4]:
def getAllDatasetNames(datasetRootDir):
    files = glob.glob(datasetRootDir + '/*/*/*.json', recursive=True)
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device = 'cpu'

datasetRootDir = '/home/andrew/GNN_Sandbox/GraphToyDatasets'
datasetDirs, datasetNames = getAllDatasetNames(datasetRootDir)

In [None]:
import time
now = time.time()

for i in range(len(datasetDirs)):
    dataset = ToyDGLDataset(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    val_sampler = SubsetRandomSampler(splitIndices['valid'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=32, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=32, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=32, drop_last=False)
    
    # Create the model with given dimensions
    model = GCN_OnlyNodeFeatures(dataset.dim_nfeats, 16, dataset.num_graph_classes).to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 30
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = train(model, device, train_dataloader, optimizer, loss_fn, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(datasetDirs[i], 'GCN_OnlyNodeFeatures')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')

end = time.time()
elapsed = end - now
print(elapsed)

Done loading data from cached files.
Beginning training on dataset Toy0_fewGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 7.7188, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.424
Epoch: 5, Loss: 0.9187, Train: 0.542, Valid: 0.550, Test: 0.450, AUC: 0.424
Epoch: 10, Loss: 0.8095, Train: 0.617, Valid: 0.500, Test: 0.425, AUC: 0.424
Epoch: 15, Loss: 0.7706, Train: 0.642, Valid: 0.425, Test: 0.475, AUC: 0.424
Epoch: 20, Loss: 0.7373, Train: 0.575, Valid: 0.275, Test: 0.425, AUC: 0.424
Epoch: 25, Loss: 0.6952, Train: 0.617, Valid: 0.250, Test: 0.400, AUC: 0.424
Best epoch: 
Epoch: 0, Loss: 7.7188, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.424
------------------(1/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy0_fewGraphs_fewNodes_SameNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 1.6407, Train: 0.475, Valid: 0.550, Test: 0.475, AUC: 0.500
Epoch: 5, Loss: 0.7419, Train: 0.433, Valid: 0.57

Epoch: 5, Loss: 2.1012, Train: 0.500, Valid: 0.450, Test: 0.550, AUC: 0.500
Epoch: 10, Loss: 0.6375, Train: 0.808, Valid: 0.775, Test: 0.825, AUC: 0.919
Epoch: 15, Loss: 0.6098, Train: 0.808, Valid: 0.725, Test: 0.775, AUC: 0.919
Epoch: 20, Loss: 0.6181, Train: 0.783, Valid: 0.725, Test: 0.725, AUC: 0.924
Epoch: 25, Loss: 0.5536, Train: 0.758, Valid: 0.675, Test: 0.700, AUC: 0.924
Best epoch: 
Epoch: 17, Loss: 0.5766, Train: 0.833, Valid: 0.825, Test: 0.900, AUC: 0.924
------------------(12/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy1_manyGraphs_fewNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 0.7914, Train: 0.501, Valid: 0.522, Test: 0.485, AUC: 0.288
Epoch: 5, Loss: 0.4592, Train: 0.672, Valid: 0.679, Test: 0.659, AUC: 0.853
Epoch: 10, Loss: 0.4986, Train: 0.703, Valid: 0.708, Test: 0.710, AUC: 0.860
Epoch: 15, Loss: 0.4169, Train: 0.778, Valid: 0.792, Test: 0.789, AUC: 0.871
Epoch: 20, Loss: 0.6332

Epoch: 5, Loss: 0.5102, Train: 0.943, Valid: 0.949, Test: 0.934, AUC: 0.989
Epoch: 10, Loss: 0.3599, Train: 0.948, Valid: 0.951, Test: 0.934, AUC: 0.987
Epoch: 15, Loss: 0.2176, Train: 0.920, Valid: 0.919, Test: 0.916, AUC: 0.987
Epoch: 20, Loss: 0.2244, Train: 0.950, Valid: 0.956, Test: 0.934, AUC: 0.992
Epoch: 25, Loss: 0.1255, Train: 0.963, Valid: 0.969, Test: 0.965, AUC: 0.995
Best epoch: 
Epoch: 29, Loss: 0.1248, Train: 0.968, Valid: 0.971, Test: 0.974, AUC: 0.997
------------------(23/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy2_manyGraphs_manyNodes_SameNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 0.8099, Train: 0.500, Valid: 0.492, Test: 0.516, AUC: 0.521
Epoch: 5, Loss: 0.4601, Train: 0.926, Valid: 0.922, Test: 0.950, AUC: 0.994
Epoch: 10, Loss: 0.2687, Train: 0.965, Valid: 0.958, Test: 0.969, AUC: 0.996
Epoch: 15, Loss: 0.2257, Train: 0.959, Valid: 0.959, Test: 0.966, AUC: 0.997
Epoch: 20, Loss: 0.1157, Tr

Epoch: 15, Loss: 0.0256, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0189, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0134, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 7, Loss: 0.1836, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(34/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy4_fewGraphs_manyNodes_DifferentNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 3.9703, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.000
Epoch: 5, Loss: 0.8015, Train: 0.500, Valid: 0.550, Test: 0.450, AUC: 0.000
Epoch: 10, Loss: 0.3024, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.1532, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0984, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0638, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 9, 

Epoch: 20, Loss: 0.1693, Train: 0.896, Valid: 0.900, Test: 0.875, AUC: 0.969
Epoch: 25, Loss: 0.2407, Train: 0.904, Valid: 0.902, Test: 0.899, AUC: 0.969
Best epoch: 
Epoch: 26, Loss: 0.2247, Train: 0.918, Valid: 0.926, Test: 0.909, AUC: 0.970
------------------(45/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy5_manyGraphs_fewNodes_SameNodeCountDistributions
Device: cuda
Epoch: 0, Loss: 0.6665, Train: 0.499, Valid: 0.519, Test: 0.484, AUC: 0.960
Epoch: 5, Loss: 0.2260, Train: 0.937, Valid: 0.922, Test: 0.946, AUC: 0.987
Epoch: 10, Loss: 0.1390, Train: 0.936, Valid: 0.919, Test: 0.945, AUC: 0.987
Epoch: 15, Loss: 0.1687, Train: 0.932, Valid: 0.916, Test: 0.940, AUC: 0.987
Epoch: 20, Loss: 0.2367, Train: 0.938, Valid: 0.922, Test: 0.948, AUC: 0.988
Epoch: 25, Loss: 0.0802, Train: 0.937, Valid: 0.926, Test: 0.941, AUC: 0.988
Best epoch: 
Epoch: 19, Loss: 0.1884, Train: 0.936, Valid: 0.931, Test: 0.941, AUC: 0.988
--------------

Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
