In [1]:
%matplotlib inline
import dgl
import copy
import glob
import pprint
import numpy as np
import torch
import torch.nn as nn
import awkward as ak
import networkx as nx
import matplotlib.pyplot as plt
from os import path
from pathlib import Path
from trainresults import TrainResults
from train_eval_func import train, evaluate
from copy import deepcopy
from dgl.data import DGLDataset
from dgl.dataloading import GraphDataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from TauGraphDatasetInfo import TauGraphDatasetInfo
from CustomGCNs import CustomGCN_OnlyNFeatSumMsg, CustomGCN_OnlyNFeatMeanMsg
from TauGraphDataset import TauGraphDataset, GetNodeFeatureVectors, GetEdgeFeatureVectors, GetNeighborNodes, GetEdgeList

plt.rcParams.update({'font.size': 20})
plt.rcParams['text.usetex'] = True
lw = 2
xyLabelFontSize = 20
xLabelPad = 10
yLabelPad = 15
pp = pprint.PrettyPrinter()

Using backend: pytorch


In [2]:
# training
def trainEpochs(model, device, dataloader, optimizer, loss_fn, batchsize, nEpochs):
    results = TrainResults()
    bestModel = None
    bestTestAcc = 0.0

    for epoch in range(nEpochs):
        # train
        epochLoss = train(model, device, dataloader, optimizer, loss_fn, batchsize, results)

        # evaluate
        train_result = evaluate(model, device, train_dataloader)
        test_result = evaluate(model, device, test_dataloader)

        results.addEpochResult(epochLoss, train_result, test_result)
        results.printLastResult()

        if results.best_test_acc > bestTestAcc:
            bestValAcc = results.best_test_acc
            bestModel = copy.deepcopy(model)

    return results, bestModel

In [3]:
def getDatasetNames(datasetDir):
    files = glob.glob(datasetDir + '/*.json', recursive=True)
    files.sort()
    datasetDirectories = [path.dirname(file) for file in files]
    datasetnames = [path.normpath(dir).split(path.sep)[-1] for dir in datasetDirectories]
    return datasetDirectories, datasetnames

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'device: {device}')

datasetDir = '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets'
datasetDirs, datasetNames = getDatasetNames(datasetDir)
print(datasetDirs)
print(datasetNames)

device: cpu
['/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets']
['Graphs_DYJetsToLL_M-50_genuineTaus_and_jets']


datasetName = datasetNames[0]
datasetDir = datasetDirs[0]
dataset = TauGraphDataset(datasetName, datasetDir)
dataset.printProperties()

graph, label = dataset[0]
print(graph)
print(f'Label: {label}')
print(GetNodeFeatureVectors(graph))

In [5]:
import time
now = time.time()

batchSize = 1024
print(f'Device: {device}')

for i in range(len(datasetDirs)):
    trainingStart = time.time()
    
    dataset = TauGraphDataset(datasetNames[i], datasetDirs[i])
    splitIndices = dataset.get_split_indices()

    train_sampler = SubsetRandomSampler(splitIndices['train'])
    test_sampler = SubsetRandomSampler(splitIndices['test'])

    train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=batchSize, drop_last=False)
    test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=batchSize, drop_last=False)
    
    # Create the model with given dimensions
    model = CustomGCN_OnlyNFeatSumMsg(dataset.dim_nfeats, 16, dataset.num_graph_classes).to(device)
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.CrossEntropyLoss()
    epochs = 50
    
    # train
    print(f'Beginning training on dataset {datasetNames[i]}')
    results, bestmodel = trainEpochs(model, device, train_dataloader, optimizer, loss_fn, batchSize, epochs)
    results.printBestResult()
    
    # save results
    outputFolder = path.join(datasetDirs[i], 'Output_CustomGCN_OnlyNFeatSumMsg')
    Path(outputFolder).mkdir(parents=True, exist_ok=True)
    
    trainingElapsed= time.time() - trainingStart
    results.savePlots(outputFolder)
    results.dumpSummary(outputFolder, trainingElapsed)
    results.pickledump(outputFolder)
    
    # save the best model for inference. (when loading for inference -> model.eval()!! )
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict
    torch.save(bestmodel.state_dict(), path.join(outputFolder, 'model.pt'))
    
    print(f'------------------({i+1}/{len(datasetDirs)}) models trained------------------\n')


end = time.time()
elapsed = end - now
print(f'{elapsed} seconds elapsed')

Device: cpu
Done loading data from cached files.
Beginning training on dataset Graphs_DYJetsToLL_M-50_genuineTaus_and_jets
Epoch: 0, Loss: 1.1228, Train: 0.835, Test: 0.837, AUC: 0.937
Epoch: 1, Loss: 0.3697, Train: 0.889, Test: 0.889, AUC: 0.944
Epoch: 2, Loss: 0.3207, Train: 0.891, Test: 0.891, AUC: 0.948
Epoch: 3, Loss: 0.2976, Train: 0.893, Test: 0.894, AUC: 0.952
Epoch: 4, Loss: 0.2866, Train: 0.896, Test: 0.896, AUC: 0.953
Epoch: 5, Loss: 0.2813, Train: 0.889, Test: 0.889, AUC: 0.953
Epoch: 6, Loss: 0.2792, Train: 0.894, Test: 0.894, AUC: 0.953
Epoch: 7, Loss: 0.2769, Train: 0.896, Test: 0.896, AUC: 0.953
Epoch: 8, Loss: 0.2773, Train: 0.884, Test: 0.884, AUC: 0.953
Epoch: 9, Loss: 0.2761, Train: 0.898, Test: 0.898, AUC: 0.955
Epoch: 10, Loss: 0.2725, Train: 0.897, Test: 0.898, AUC: 0.955
Epoch: 11, Loss: 0.2706, Train: 0.897, Test: 0.898, AUC: 0.955
Epoch: 12, Loss: 0.2710, Train: 0.898, Test: 0.899, AUC: 0.956
Epoch: 13, Loss: 0.2696, Train: 0.898, Test: 0.898, AUC: 0.956
Epoch