In [1]:
import dgl
import copy
import glob
import math
import torch
import torch.nn as nn
import numpy as np
from os import path
from pathlib import Path
from dgl.data import DGLDataset
from TrainResults import TrainResults
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from SimpleGCN_OnlyNodeFeatures import GCN_OnlyNodeFeatures
from ToyDGLDataset import ToyDGLDataset, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

Using backend: pytorch


In [2]:
# The evaluation function
def eval(model, device, dataloader):
    model = model.to(device)
    model.eval()
    y_true = []
    y_logits = []

    for batched_graph, labels in dataloader:
        batched_graph = batched_graph.to(device)
        labels = labels.to(device)
        nodeFeatVec = GetNodeFeatureVectors(batched_graph)

        with torch.no_grad():
            pred = model(batched_graph, nodeFeatVec)

        y_true.append(labels.detach().cpu())
        y_logits.append(pred.detach().cpu())

    y_true = torch.cat(y_true, dim = 0).numpy()
    y_logits = torch.cat(y_logits, dim = 0)
    y_softmax = nn.functional.softmax(y_logits, dim=1)
    y_scoreClass1 = y_softmax[:, 1]
    y_pred = y_logits.numpy().argmax(1)
    
    num_correct_pred = (y_pred == y_true).sum().item()
    num_total_pred = len(y_true)
    acc =  num_correct_pred / num_total_pred
    
    evalDict = {
        "y_true": y_true.tolist(), 
        "y_logits": y_logits.tolist(), 
        "y_scoreClass1": y_scoreClass1.tolist(),
        "y_pred": y_pred.tolist(), 
        "acc": acc
    }

    return evalDict

In [3]:
def train(model, device, dataloader, optimizer, loss_fn, epochs):
    print(f'Device: {device}')
    model = model.to(device)
    model.train()
    
    results = TrainResults()
    bestModel = None
    bestValAcc = 0

    for epoch in range(epochs):
        for batched_graph, labels in dataloader:
            batched_graph = batched_graph.to(device)
            labels = labels.to(device)
            nodeFeatVec = GetNodeFeatureVectors(batched_graph)

            #forward
            pred =  model(batched_graph, nodeFeatVec)

            # compute loss
            loss = loss_fn(pred, labels)

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_result = eval(model, device, train_dataloader)
        val_result = eval(model, device, val_dataloader)
        test_result = eval(model, device, test_dataloader)
        
        results.addResult(loss.tolist(), train_result, val_result, test_result)
        if(epoch % 5 == 0):
            results.printLastResult()
        
        if results.best_val_acc > bestValAcc:
            bestValAcc = results.best_val_acc
            bestModel = copy.deepcopy(model)
        
    return results, bestModel

In [4]:
def getAllDatasetNames(datasetRootDir):
    files = glob.glob(datasetRootDir + '/*/*/*.json', recursive=True)
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [5]:
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

datasetRootDir = '/home/andrew/GNN_Sandbox/GraphToyDatasets'
datasetDirs, datasetNames = getAllDatasetNames(datasetRootDir)

In [6]:
import time
now = time.time()

for i in range(len(datasetDirs)):
    dataset = ToyDGLDataset(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    val_sampler = SubsetRandomSampler(splitIndices['valid'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=32, drop_last=False)
    val_dataloader = GraphDataLoader(dataset, sampler=val_sampler, batch_size=32, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=32, drop_last=False)
    
    # Create the model with given dimensions
    model = GCN_OnlyNodeFeatures(dataset.dim_nfeats, 16, dataset.num_graph_classes).to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 30
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = train(model, device, train_dataloader, optimizer, loss_fn, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(datasetDirs[i], 'GCN_OnlyNodeFeatures')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')

end = time.time()
elapsed = end - now
print(elapsed)

Done loading data from cached files.
Beginning training on dataset Toy0_000
Device: cpu
Epoch: 0, Loss: 5.8054, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.500
Epoch: 5, Loss: 0.7004, Train: 0.493, Valid: 0.450, Test: 0.400, AUC: 0.500
Epoch: 10, Loss: 0.8559, Train: 0.479, Valid: 0.475, Test: 0.450, AUC: 0.500
Epoch: 15, Loss: 0.7377, Train: 0.457, Valid: 0.475, Test: 0.500, AUC: 0.380
Epoch: 20, Loss: 0.7698, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.380
Epoch: 25, Loss: 0.6315, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.380
Best epoch: 
Epoch: 12, Loss: 0.7203, Train: 0.471, Valid: 0.525, Test: 0.500, AUC: 0.380
------------------(1/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy0_001
Device: cpu
Epoch: 0, Loss: 0.6511, Train: 0.514, Valid: 0.550, Test: 0.550, AUC: 0.480
Epoch: 5, Loss: 0.8539, Train: 0.593, Valid: 0.575, Test: 0.550, AUC: 0.470
Epoch: 10, Loss: 0.6087, Train: 0.500, Valid: 0.500, Test: 0.50

------------------(12/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy1_100
Device: cpu
Epoch: 0, Loss: 0.5103, Train: 0.771, Valid: 0.748, Test: 0.752, AUC: 0.838
Epoch: 5, Loss: 0.4573, Train: 0.718, Valid: 0.725, Test: 0.698, AUC: 0.895
Epoch: 10, Loss: 0.3062, Train: 0.797, Valid: 0.789, Test: 0.810, AUC: 0.907
Epoch: 15, Loss: 0.1813, Train: 0.886, Valid: 0.886, Test: 0.863, AUC: 0.933
Epoch: 20, Loss: 0.2184, Train: 0.868, Valid: 0.866, Test: 0.863, AUC: 0.933
Epoch: 25, Loss: 0.2046, Train: 0.916, Valid: 0.910, Test: 0.912, AUC: 0.950
Best epoch: 
Epoch: 29, Loss: 0.2153, Train: 0.921, Valid: 0.915, Test: 0.917, AUC: 0.951
------------------(13/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy1_101
Device: cpu
Epoch: 0, Loss: 0.7325, Train: 0.502, Valid: 0.484, Test: 0.530, AUC: 0.321
Epoch: 5, Loss: 0.4098, Train: 0.839, Valid: 0.820, Test: 0.858, AUC: 0.924
Ep

Epoch: 25, Loss: 0.1868, Train: 0.962, Valid: 0.971, Test: 0.960, AUC: 0.997
Best epoch: 
Epoch: 29, Loss: 0.1184, Train: 0.975, Valid: 0.980, Test: 0.985, AUC: 0.998
------------------(24/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy3_000
Device: cpu
Epoch: 0, Loss: 9.3309, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.500
Epoch: 5, Loss: 0.6886, Train: 0.786, Valid: 0.600, Test: 0.750, AUC: 0.780
Epoch: 10, Loss: 0.3116, Train: 0.679, Valid: 0.675, Test: 0.700, AUC: 0.840
Epoch: 15, Loss: 0.6815, Train: 0.857, Valid: 0.750, Test: 0.800, AUC: 0.880
Epoch: 20, Loss: 0.4265, Train: 0.864, Valid: 0.775, Test: 0.850, AUC: 0.910
Epoch: 25, Loss: 0.3130, Train: 0.864, Valid: 0.825, Test: 0.850, AUC: 0.920
Best epoch: 
Epoch: 27, Loss: 0.2192, Train: 0.871, Valid: 0.850, Test: 0.850, AUC: 0.930
------------------(25/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy3_001

Epoch: 15, Loss: 0.1446, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0860, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0542, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 8, Loss: 0.3779, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(36/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy4_100
Device: cpu
Epoch: 0, Loss: 0.1548, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 5, Loss: 0.0023, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0005, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0003, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0001, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 0, Loss: 0.1548, Train: 1.000, Valid: 1.000, Test: 1

Epoch: 5, Loss: 0.2124, Train: 0.990, Valid: 0.995, Test: 0.993, AUC: 1.000
Epoch: 10, Loss: 0.0247, Train: 0.996, Valid: 0.999, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0265, Train: 0.997, Valid: 0.999, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0097, Train: 0.997, Valid: 0.999, Test: 1.000, AUC: 1.000
Epoch: 25, Loss: 0.0162, Train: 0.997, Valid: 0.999, Test: 1.000, AUC: 1.000
Best epoch: 
Epoch: 12, Loss: 0.0199, Train: 0.998, Valid: 1.000, Test: 1.000, AUC: 1.000
------------------(48/56) models trained------------------

Done loading data from cached files.
Beginning training on dataset Toy6_000
Device: cpu
Epoch: 0, Loss: 1.6342, Train: 0.500, Valid: 0.500, Test: 0.500, AUC: 0.190
Epoch: 5, Loss: 0.1195, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 10, Loss: 0.0280, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 15, Loss: 0.0185, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1.000
Epoch: 20, Loss: 0.0133, Train: 1.000, Valid: 1.000, Test: 1.000, AUC: 1