In [1]:
%matplotlib inline

import glob
import uproot
import pprint
import numpy as np
import awkward as ak
import matplotlib.pyplot as plt
from os import path
from tqdm import tqdm
from trainresults import TrainResults
plt.rcParams.update({'font.size': 20})
plt.rcParams['text.usetex'] = True
lw = 2
xyLabelFontSize = 20
xLabelPad = 10
yLabelPad = 15
pp = pprint.PrettyPrinter()


In [2]:
def getParentFolderName(absoluteFileName):
    dirname = path.dirname(absoluteFileName)
    return path.basename(dirname)

In [3]:
import re
def getNumbersFromString(myString):
    return re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", myString)

def pad(mystr,spaces = 5):
    return f'{mystr: <{spaces}}'

In [None]:
def getResultLatexTable(fileList):
    results = []
    for file in tqdm(fileList):
        r = TrainResults.loadPickle(file)
        idx = r.best_val_acc_epoch
        epochLoss = pad(f'{r.epochloss[idx]:.4f}')
        valLoss = pad(f'{r.epoch_val_loss[idx]:.4f}')
        AUC = pad(f'{r.roc_auc:.3f}')
        trainAcc = pad(f'{r.train_acc[idx]:.3f}')
        valAcc = pad(f'{r.val_acc[idx]:.3f}')
        testAcc = pad(f'{r.test_acc[idx]:.3f}')
        walltime = pad(f'{int(r.trainingDuration)/60:.2f}')
        params = [pad(p) for p in getNumbersFromString(getParentFolderName(file))]
        results.append(f'{params[0]} & {params[1]} & {params[2]} & {epochLoss} & {valLoss} & {AUC} & {trainAcc} & {valAcc} & {testAcc} & {walltime}')
    return results

In [None]:
folder = '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets'
files = glob.glob(f"{folder}" + "/**/trainresult.pkl", recursive=True)
files = [file for file in files if "DYJetsToLL_M-50" in file]
directoryNames = [path.dirname(file) for file in files]
baseFolderNames = [path.basename(dirName) for dirName in directoryNames]

#test = list(zip(files, baseFolderNames))
#pp.pprint(test)


#pp.pprint(directoryNames)
#pp.pprint(baseFolderNames)


In [None]:
GCN_SUM_FILES = list(sorted([s for s in files if "CustomGCN_OnlyNFeatSumMsg" in s]))
GCN_MEAN_FILES = list(sorted([s for s in files if "CustomGCN_OnlyNFeatMeanMsg" in s]))
MPGNN_FILES = list(sorted([s for s in files if "MPGNN" in s and "Mean" not in s]))
MPGNN_MEAN_FILES = list(sorted([s for s in files if "MPGNN_Mean" in s]))

outputFile = '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets/FormattedResultsTables.txt'

allResultsList = []
allResultsList.append("CUSTOM GCN ONLY N FEAT SUM MSG")
allResultsList.append("###############################################")
allResultsList.append("NHiddenFeats, MsgPasses, Dropout, TrainLoss, ValidLoss, TrainAcc, ValAcc, TestAcc, AUC, Walltime t in mins")
allResultsList.extend(getResultLatexTable(GCN_SUM_FILES))
allResultsList.append("###############################################\n")

allResultsList.append("CUSTOM GCN ONLY N MEAN SUM MSG")
allResultsList.append("###############################################")
allResultsList.append("NHiddenFeats, MsgPasses, Dropout, TrainLoss, ValidLoss, TrainAcc, ValAcc, TestAcc, AUC, Walltime t in mins")
allResultsList.extend(getResultLatexTable(GCN_MEAN_FILES))
allResultsList.append("###############################################\n")

allResultsList.append("MPGNN SUM MSG")
allResultsList.append("###############################################")
allResultsList.append("NHiddenFeats, EHiddenFeats, MsgPasses, TrainLoss, ValidLoss, TrainAcc, ValAcc, TestAcc, AUC, Walltime t in mins")
allResultsList.extend(getResultLatexTable(MPGNN_FILES))
allResultsList.append("###############################################\n")

allResultsList.append("MPGNN MEAN MSG")
allResultsList.append("###############################################")
allResultsList.append("NHiddenFeats, EHiddenFeats, MsgPasses, TrainLoss, ValidLoss, TrainAcc, ValAcc, TestAcc, AUC, Walltime t in mins")
allResultsList.extend(getResultLatexTable(MPGNN_MEAN_FILES))
allResultsList.append("###############################################\n")

In [None]:
with open(outputFile, 'w') as f:
    f.write("\n".join(allResultsList))

In [None]:
print("\n".join(allResultsList))

In [4]:
folder = '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets'
files = glob.glob(f"{folder}" + "/**/trainresult.pkl", recursive=True)
files = [file for file in files if "DYJetsToLL_M-50" in file]
directoryNames = [path.dirname(file) for file in files]
baseFolderNames = [path.basename(dirName) for dirName in directoryNames]

In [7]:
pp.pprint([path.dirname(f) for f in files])

['/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets/Output_CustomGCN_OnlyNFeatMeanMsg_NHiddenFeat_32_MsgPasses_1_Dropout_0',
 '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets/Output_CustomGCN_OnlyNFeatMeanMsg_NHiddenFeat_32_MsgPasses_2_Dropout_0',
 '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets/Output_CustomGCN_OnlyNFeatSumMsg_NHiddenFeat_16_MsgPasses_1_Dropout_0.5',
 '/ceph/aissac/ntuple_for_graphs/prod_2018_v2_processed_v5_THESIS/trimmed_200000_and_cut_puppiWeightNoLep_greater_0_and_deltaR_smaller_0point5/Graphs_DYJetsToLL_M-50_genuineTaus_and_jets/Output_MPGNN_NHiddenFeats_16_EHi

In [9]:
for file in tqdm(files):
    r = TrainResults.loadPickle(file)
    r.savePlots(path.dirname(file))

100%|██████████| 94/94 [14:22<00:00,  9.17s/it]
