In [1]:
import dgl
import copy
import glob
import math
import torch
import torch.nn as nn
import numpy as np
from os import path
from pathlib import Path
from trainAndEval import train, evaluate
from dgl.data import DGLDataset
from TrainResults import TrainResults
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from SimpleGCN_OnlyNodeFeatures import GCN_OnlyNodeFeatures
from ToyDGLDataset_v2 import ToyDGLDataset_v2, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

Using backend: pytorch


In [2]:
# training
def trainEpochs(model, device, dataloader, optimizer, loss_fn, batchsize, nEpochs):
    results = TrainResults()
    results.startTrainingTimer()
    bestModel = None
    bestValAcc = 0.0

    for epoch in range(nEpochs):
        # train
        epochLoss = train(model, device, dataloader, optimizer, loss_fn, batchsize, results)

        # evaluate
        train_result = evaluate(model, device, train_dataloader, loss_fn)
        val_result = evaluate(model, device, val_dataloader, loss_fn)
        test_result = evaluate(model, device, test_dataloader, loss_fn)

        results.addEpochResult(epochLoss, train_result, val_result, test_result)

        if(epoch % 5 == 0):
            results.printLastResult()

        if results.best_val_acc > bestValAcc:
            bestValAcc = results.best_val_acc
            bestModel = copy.deepcopy(model)
            
    results.endTrainingTimer()

    return results, bestModel

In [3]:
def getAllDatasetNames(datasetRootDir):
    files = glob.glob(datasetRootDir + '/*/*/*.json', recursive=True)
    #files = [x for x in files if "Toy2_v3_0" in x]
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [4]:
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

datasetRootDir = '/home/andrew/GNN_Sandbox/GraphToyDatasets_v3'
datasetDirs, datasetNames = getAllDatasetNames(datasetRootDir)

In [5]:
import time
now = time.time()

batchSize = 1024
print(f'Device: {device}')

for i in range(len(datasetDirs)):
    dataset = ToyDGLDataset_v2(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    val_sampler = SubsetRandomSampler(splitIndices['valid'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=batchSize, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=batchSize, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=batchSize, drop_last=False)
    
    # Create the model with given dimensions
    model = GCN_OnlyNodeFeatures(dataset.dim_nfeats, 16, dataset.num_graph_classes, norm='both').to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 30
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = trainEpochs(model, device, train_dataloader, optimizer, loss_fn, batchSize, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(path.join(datasetDirs[i], "ThesisPlots"), 'GCN_OnlyNodeFeatures_normBoth')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')


end = time.time()
elapsed = end - now
print(f'{elapsed} seconds elapsed')

Device: cpu
Done loading data from cached files.
Beginning training on dataset Toy0_v3_1_0NodeCountMeanDiff
Epoch: 0, Loss: 0.7778, Validation Loss: 0.6940, Train: 0.500, Validation: 0.506, Test: 0.496, AUC: 0.496
Epoch: 5, Loss: 0.6950, Validation Loss: 0.6943, Train: 0.498, Validation: 0.502, Test: 0.505, AUC: 0.496
Epoch: 10, Loss: 0.6951, Validation Loss: 0.6932, Train: 0.499, Validation: 0.503, Test: 0.496, AUC: 0.496
Epoch: 15, Loss: 0.6955, Validation Loss: 0.6961, Train: 0.498, Validation: 0.502, Test: 0.506, AUC: 0.496
Epoch: 20, Loss: 0.6961, Validation Loss: 0.6945, Train: 0.498, Validation: 0.502, Test: 0.506, AUC: 0.496
Epoch: 25, Loss: 0.6959, Validation Loss: 0.7059, Train: 0.502, Validation: 0.497, Test: 0.494, AUC: 0.496
Best epoch: 
Epoch: 0, Loss: 0.7778, Validation Loss: 0.6940, Train: 0.500, Validation: 0.506, Test: 0.496, AUC: 0.496

The training took 158 seconds (2.63 minutes).
------------------(1/27) models trained------------------

Done loading data from cach

Epoch: 20, Loss: 0.4472, Validation Loss: 0.4408, Train: 0.816, Validation: 0.814, Test: 0.818, AUC: 0.886
Epoch: 25, Loss: 0.4400, Validation Loss: 0.4364, Train: 0.822, Validation: 0.821, Test: 0.824, AUC: 0.886
Best epoch: 
Epoch: 21, Loss: 0.4448, Validation Loss: 0.4456, Train: 0.831, Validation: 0.829, Test: 0.833, AUC: 0.886

The training took 197 seconds (3.28 minutes).
------------------(9/27) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy2_v3_0_0percentDiff
Epoch: 0, Loss: 8.1433, Validation Loss: 0.7109, Train: 0.501, Validation: 0.494, Test: 0.506, AUC: 0.509
Epoch: 5, Loss: 0.6999, Validation Loss: 0.6986, Train: 0.501, Validation: 0.500, Test: 0.509, AUC: 0.511
Epoch: 10, Loss: 0.6959, Validation Loss: 0.6960, Train: 0.498, Validation: 0.508, Test: 0.506, AUC: 0.510
Epoch: 15, Loss: 0.6943, Validation Loss: 0.6983, Train: 0.502, Validation: 0.497, Test: 0.494, AUC: 0.510
Epoch: 20, Loss: 0.6941, Validation Loss: 0.

Epoch: 5, Loss: 0.6539, Validation Loss: 0.6499, Train: 0.653, Validation: 0.655, Test: 0.657, AUC: 0.727
Epoch: 10, Loss: 0.6297, Validation Loss: 0.6258, Train: 0.672, Validation: 0.672, Test: 0.685, AUC: 0.744
Epoch: 15, Loss: 0.6049, Validation Loss: 0.6021, Train: 0.697, Validation: 0.698, Test: 0.710, AUC: 0.779
Epoch: 20, Loss: 0.5749, Validation Loss: 0.5698, Train: 0.736, Validation: 0.735, Test: 0.746, AUC: 0.822
Epoch: 25, Loss: 0.5436, Validation Loss: 0.5405, Train: 0.761, Validation: 0.762, Test: 0.766, AUC: 0.860
Best epoch: 
Epoch: 28, Loss: 0.5168, Validation Loss: 0.5132, Train: 0.791, Validation: 0.793, Test: 0.798, AUC: 0.885

The training took 252 seconds (4.20 minutes).
------------------(18/27) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy2_v3_1_0NodeCountMeanDiff
Epoch: 0, Loss: 5.0886, Validation Loss: 0.6975, Train: 0.501, Validation: 0.505, Test: 0.501, AUC: 0.501
Epoch: 5, Loss: 0.6964, Validation Lo

Best epoch: 
Epoch: 29, Loss: 0.5676, Validation Loss: 0.5582, Train: 0.705, Validation: 0.711, Test: 0.713, AUC: 0.793

The training took 200 seconds (3.33 minutes).
------------------(26/27) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy2_v3_1_9NodeCountMeanDiff
Epoch: 0, Loss: 0.8998, Validation Loss: 0.6713, Train: 0.565, Validation: 0.569, Test: 0.567, AUC: 0.606
Epoch: 5, Loss: 0.4973, Validation Loss: 0.5059, Train: 0.740, Validation: 0.733, Test: 0.734, AUC: 0.829
Epoch: 10, Loss: 0.4953, Validation Loss: 0.5053, Train: 0.747, Validation: 0.742, Test: 0.743, AUC: 0.829
Epoch: 15, Loss: 0.4962, Validation Loss: 0.5093, Train: 0.734, Validation: 0.728, Test: 0.728, AUC: 0.829
Epoch: 20, Loss: 0.4977, Validation Loss: 0.5043, Train: 0.745, Validation: 0.738, Test: 0.740, AUC: 0.829
Epoch: 25, Loss: 0.4975, Validation Loss: 0.5061, Train: 0.746, Validation: 0.740, Test: 0.739, AUC: 0.840
Best epoch: 
Epoch: 27, Loss: 0.4973,