In [1]:
import dgl
import copy
import glob
import math
import torch
import torch.nn as nn
import numpy as np
from os import path
from pathlib import Path
from dgl.data import DGLDataset
from TrainResults import TrainResults
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from SimpleGCN_OnlyNodeFeatures import GCN_OnlyNodeFeatures
from ToyDGLDataset import ToyDGLDataset, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

Using backend: pytorch


In [2]:
# The evaluation function
@torch.no_grad()
def eval(model, device, dataloader):
    model = model.to(device)
    model.eval()
    y_true = []
    y_logits = []

    for batched_graph, labels in dataloader:
        batched_graph = batched_graph.to(device)
        labels = labels.to(device)
        nodeFeatVec = GetNodeFeatureVectors(batched_graph)

        with torch.no_grad():
            pred = model(batched_graph, nodeFeatVec)

        y_true.append(labels.detach().cpu())
        y_logits.append(pred.detach().cpu())

    y_true = torch.cat(y_true, dim = 0).numpy()
    y_logits = torch.cat(y_logits, dim = 0)
    y_softmax = nn.functional.softmax(y_logits, dim=1)
    y_scoreClass1 = y_softmax[:, 1]
    y_pred = y_logits.numpy().argmax(1)
    
    num_correct_pred = (y_pred == y_true).sum().item()
    num_total_pred = len(y_true)
    acc =  num_correct_pred / num_total_pred
    
    evalDict = {
        "y_true": y_true.tolist(), 
        "y_logits": y_logits.tolist(), 
        "y_scoreClass1": y_scoreClass1.tolist(),
        "y_pred": y_pred.tolist(), 
        "acc": acc
    }

    return evalDict

In [3]:
def train(model, device, dataloader, optimizer, loss_fn, batchsize, results):
    model = model.to(device)
    model.train()
    
    epochLoss = 0.0
    batchIter = 0
    
    for batched_graph, labels in dataloader:
        batched_graph = batched_graph.to(device)
        labels = labels.to(device)
        nodeFeatVec = GetNodeFeatureVectors(batched_graph)

        #forward
        pred =  model(batched_graph, nodeFeatVec)

        # compute loss
        loss = loss_fn(pred, labels)

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # multiply running loss by the number of graphs, 
        # since CrossEntropy loss calculates mean of the losses of the graphs in the batch
        runningTotalLoss = loss.item() #* batchsize
        results.addRunningLoss(runningTotalLoss)
        epochLoss += runningTotalLoss
        batchIter += 1
        
    return epochLoss/batchIter
    

In [4]:
# training
def trainEpochs(model, device, dataloader, optimizer, loss_fn, batchsize, nEpochs):
    results = TrainResults()
    bestModel = None
    bestValAcc = 0.0

    for epoch in range(nEpochs):
        # train
        epochLoss = train(model, device, dataloader, optimizer, loss_fn, batchsize, results)

        # evaluate
        train_result = eval(model, device, train_dataloader)
        val_result = eval(model, device, val_dataloader)
        test_result = eval(model, device, test_dataloader)

        results.addEpochResult(epochLoss, train_result, val_result, test_result)

        if(epoch % 5 == 0):
            results.printLastResult()

        if results.best_val_acc > bestValAcc:
            bestValAcc = results.best_val_acc
            bestModel = copy.deepcopy(model)

    return results, bestModel

In [5]:
def getAllDatasetNames(datasetRootDir):
    files = glob.glob(datasetRootDir + '/*/*/*.json', recursive=True)
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [6]:
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

datasetRootDir = '/home/andrew/GNN_Sandbox/GraphToyDatasets'
datasetDirs, datasetNames = getAllDatasetNames(datasetRootDir)

In [7]:
import time
now = time.time()

batchSize = 1024
print(f'Device: {device}')

for i in range(len(datasetDirs)):
    dataset = ToyDGLDataset(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    val_sampler = SubsetRandomSampler(splitIndices['valid'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=batchSize, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=batchSize, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=batchSize, drop_last=False)
    
    # Create the model with given dimensions
    model = GCN_OnlyNodeFeatures(dataset.dim_nfeats, 16, dataset.num_graph_classes).to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 30
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = trainEpochs(model, device, train_dataloader, optimizer, loss_fn, batchSize, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(datasetDirs[i], 'GCN_OnlyNodeFeatures_lossPerTrainingstep__')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')

end = time.time()
elapsed = end - now
print(f'{elapsed} seconds elapsed')

Device: cpu
Done loading data from cached files.
Beginning training on dataset Toy0_000
Epoch: 0, Loss: 21.2024, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.520
Epoch: 5, Loss: 3.0572, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.520
Epoch: 10, Loss: 0.8999, Train: 0.543, Valid: 0.425, Test: 0.350, AUC: 0.340
Epoch: 15, Loss: 0.6989, Train: 0.500, Valid: 0.475, Test: 0.450, AUC: 0.340
Epoch: 20, Loss: 0.7130, Train: 0.521, Valid: 0.500, Test: 0.500, AUC: 0.420
Epoch: 25, Loss: 0.6861, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.580
Best epoch: 
Epoch: 24, Loss: 0.7106, Train: 0.521, Valid: 0.600, Test: 0.400, AUC: 0.580
------------------(1/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy0_001
Epoch: 0, Loss: 13.3087, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.670
Epoch: 5, Loss: 1.8476, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.670
Epoch: 10, Loss: 0.7439, Train: 0.514, Valid: 0.500, Test: 0.550, AUC: 0.

Done loading data from cached files.
Beginning training on dataset Toy1_100
Epoch: 0, Loss: 2.6814, Train: 0.564, Valid: 0.564, Test: 0.613, AUC: 0.662
Epoch: 5, Loss: 0.4873, Train: 0.551, Valid: 0.573, Test: 0.540, AUC: 0.858
Epoch: 10, Loss: 0.3918, Train: 0.843, Valid: 0.858, Test: 0.825, AUC: 0.900
Epoch: 15, Loss: 0.3542, Train: 0.834, Valid: 0.838, Test: 0.843, AUC: 0.911
Epoch: 20, Loss: 0.3398, Train: 0.862, Valid: 0.865, Test: 0.855, AUC: 0.934
Epoch: 25, Loss: 0.3151, Train: 0.892, Valid: 0.892, Test: 0.873, AUC: 0.943
Best epoch: 
Epoch: 26, Loss: 0.3115, Train: 0.901, Valid: 0.904, Test: 0.907, AUC: 0.943
------------------(13/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy1_101
Epoch: 0, Loss: 1.0364, Train: 0.591, Valid: 0.569, Test: 0.613, AUC: 0.770
Epoch: 5, Loss: 0.5434, Train: 0.847, Valid: 0.828, Test: 0.845, AUC: 0.889
Epoch: 10, Loss: 0.4532, Train: 0.814, Valid: 0.787, Test: 0.800, AUC: 0.889
Epoch: 15

Epoch: 5, Loss: 0.8900, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.010
Epoch: 10, Loss: 0.6851, Train: 0.514, Valid: 0.550, Test: 0.600, AUC: 0.770
Epoch: 15, Loss: 0.5933, Train: 0.707, Valid: 0.650, Test: 0.750, AUC: 0.860
Epoch: 20, Loss: 0.5359, Train: 0.757, Valid: 0.750, Test: 0.750, AUC: 0.940
Epoch: 25, Loss: 0.4963, Train: 0.779, Valid: 0.750, Test: 0.750, AUC: 0.940
Best epoch: 
Epoch: 26, Loss: 0.4751, Train: 0.864, Valid: 0.775, Test: 0.850, AUC: 0.980
------------------(25/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy3_001
Epoch: 0, Loss: 5.2429, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.130
Epoch: 5, Loss: 0.8360, Train: 0.479, Valid: 0.500, Test: 0.450, AUC: 0.680
Epoch: 10, Loss: 0.6199, Train: 0.707, Valid: 0.775, Test: 0.700, AUC: 0.700
Epoch: 15, Loss: 0.6260, Train: 0.636, Valid: 0.725, Test: 0.650, AUC: 0.730
Epoch: 20, Loss: 0.5537, Train: 0.786, Valid: 0.825, Test: 0.700, AUC: 0.810
Epoch: 

Epoch: 5, Loss: 0.0015, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0005, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 0, Loss: 0.3388, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(37/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy4_101
Epoch: 0, Loss: 0.0815, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 5, Loss: 0.0005, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0002, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0000, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 2

Epoch: 5, Loss: 0.6368, Train: 0.936, Valid: 0.975, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0494, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0236, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0170, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0133, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 6, Loss: 0.2192, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(49/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy6_001
Epoch: 0, Loss: 2.7913, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 5, Loss: 0.0803, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0347, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0226, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0180, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 2