In [70]:
import numpy as np
from sklearn import metrics
from array import array
from ROOT import TFile, TH1D, TCanvas

WORKDIR = "/home/choij/workspace/ChargedHiggsAnalysis"
CHANNEL = "Skim3Mu"
SIG = "MHc-130_MA-90"
BKG = "ttX"

In [71]:
f = TFile.Open(f"{WORKDIR}/DenseNeuralNet/{CHANNEL}/{SIG}_vs_{BKG}/root/scores.root")
tree = f.Get("Events")


scores = {}
for idx in range(18):
    scores[f"model{idx}"] = array('f', [0.]); tree.SetBranchAddress(f"score-model{idx}", scores[f"model{idx}"])
trainMask = array("B", [False]); tree.SetBranchAddress(f"trainMask", trainMask)
validMask = array("B", [False]); tree.SetBranchAddress(f"validMask", validMask)
testMask = array("B", [False]); tree.SetBranchAddress(f"testMask", testMask)
signalMask = array("B", [False]); tree.SetBranchAddress(f"signalMask", signalMask)

4

In [72]:
def checkCompatibility(idx, cut):
    hTrain = TH1D("hTrain", "", 10000, 0., 1.)
    hTest = TH1D("hTest", "", 10000, 0., 1.)
    
    for i in range(tree.GetEntries()):
        tree.GetEntry(i)
        if trainMask[0]: hTrain.Fill(scores[f"model{idx}"][0])
        if testMask[0]:  hTest.Fill(scores[f"model{idx}"][0])
        
    ksprob = hTrain.KolmogorovTest(hTest)
    del hTrain, hTest

    return ksprob > cut

In [73]:
def getAUC(idx, whichset):
    predictions = []
    answers = []
    
    for i in range(tree.GetEntries()):
        tree.GetEntry(i)
        if whichset == "train":
            if not trainMask[0]: continue
        elif whichset == "valid":
            if not validMask[0]: continue
        elif whichset == "test":
            if not testMask[0]: continue
        else:
            print(f"Wrong input {whichset}")
            return None

        predictions.append(scores[f"model{idx}"][0])
        answers.append(signalMask[0])
    
    fpr, tpr, _ = metrics.roc_curve(answers, predictions, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    return auc

In [74]:
bestModelIdx = -1
bestAUC = 0.
for idx in range(18):
    if not checkCompatibility(idx, 0.1):
        continue
    
    testAUC = getAUC(idx, "test")
    print(f"model-{idx} with testAUC = {testAUC:.3f}")
    if bestAUC < testAUC:
        bestModelIdx = idx
        bestAUC = testAUC
print(f"best model: model-{bestModelIdx} with test AUC {bestAUC:.3f}")

model-0 with testAUC = 0.791
model-1 with testAUC = 0.783
model-2 with testAUC = 0.786
model-3 with testAUC = 0.772
model-4 with testAUC = 0.766
model-5 with testAUC = 0.776
model-6 with testAUC = 0.764
model-7 with testAUC = 0.788
model-8 with testAUC = 0.778
model-9 with testAUC = 0.792
model-10 with testAUC = 0.792
model-11 with testAUC = 0.783
model-12 with testAUC = 0.781
model-13 with testAUC = 0.781
model-14 with testAUC = 0.783
model-15 with testAUC = 0.782
model-16 with testAUC = 0.791
model-17 with testAUC = 0.797
best model: model-17 with test AUC 0.797
