In [4]:
import awkward as ak
import numpy as np
import pandas as pd
from tabulate import tabulate

from apps.prod4a_merge_study import BestCut, EventSelection, PFOSelection, SplitSample, ShowerMergeQuantities
from python.analysis import Master, Plots

nEvents = 500
events = Master.Data("work/ROOTFiles/Prod4a_1GeV_BeamSim_00.root", True)#, _nEvents=nEvents)
cuts = "1GeV/analysedCuts.csv"
# events = Master.Data("work/ROOTFiles/Prod4a_6GeV_BeamSim_00.root", True)#, _nEvents=nEvents)
# cuts = "csv/cuts/analysedCuts-6GeV.csv"

EventSelection(events)
PFOSelection(events)
start_showers, to_merge = SplitSample(events)

# start_showers = EventSelection(events)
#start_showers = PFOSelection(events, start_showers)
mask = np.logical_or(events.recoParticles.cnnScore > 0.64, np.logical_or(*start_showers)) # select PFOs which pass CNN selection or are start showers
events.Filter([mask])
start_showers = [start_showers[i][mask] for i in range(2)]
to_merge = to_merge[mask]
#to_merge = np.logical_not(np.logical_or(*start_showers))

quantities = ShowerMergeQuantities(events, to_merge, cuts)
quantities.bestCut = "purity"

n_merge = -1

print("calculating score...")
def SortByStartingShower(data):
    data = [ak.unflatten(data[i], 1, -1) for i in range(2)]
    return ak.concatenate(data, -1)

def ClosestQuantity(q : ak.Array, mask : ak.Array):
    masked_q = ak.where(mask, q, 9999999)
    q_to_merge = ak.argmin(masked_q, -1, keepdims=True)
    return ak.where(ak.min(masked_q, -1, keepdims=True) == 9999999, -1, q_to_merge)

#* retrieve quantities and find which start shower is closest to each PFO for each variable
quantities.Evaluate(events, start_showers)
print("evaluated quantities")
mask = SortByStartingShower(quantities.mask) # PFOs we want to merge after cut based selection is done
alpha = ClosestQuantity(SortByStartingShower(quantities.alpha), mask) # can use this to determine which starting shower the PFO is closest to in angle
x = ClosestQuantity(SortByStartingShower(quantities.delta_x), mask) # can use this to determine which starting shower the PFO is closest to in space
phi = ClosestQuantity(SortByStartingShower(quantities.delta_phi), mask) # can use this to determine which starting shower the PFO direction is most aligned to
print(mask)

#* figure out which is the common start shower between all variables
# if min phi, alpha and x are all the same then merge to that shower
# if two are the same, merge to the most common shower
# if none agree (shouldn't be possible)
#! should replace this with calculating the mode of the scores
scores_raw = ak.sum(ak.concatenate([phi, x, alpha], -1), -1)
scores = ak.where(scores_raw == 1, 0, scores_raw) # [1, 0, 0]
scores = ak.where(scores == 2, 1, scores) # [1, 1, 0]
scores = ak.where(scores == 3, 1, scores) # [1, 1, 1]



'BeamMCFilter' executed in 9.1469s
number of dalitz decays: 32
'ApplyBeamFilter' executed in 6.4342s


'EventSelection' executed in 67.5707s




'PFOSelection' executed in 8.1073s


'SplitSample' executed in 12.3763s
calculating score...


'Evaluate' executed in 89.9578s
evaluated quantities
[[[False, False], [False, False], [False, ... [False, False], [False, False]]]


In [5]:
def ShowerMergingPFOPerformance(start_showers, to_merge, scores, quantities):
    # false negative - showers we should have merged but didn't
    # false positive - showers we merged but shouldn't have
    # true positive - showers we should have merged and did
    # true negative - showers we should have merged but didn't
    # mismatch - of the showers merged, which were assigned to the wrong start shower

    all_showers = np.logical_or(*start_showers)
    s_num = events.trueParticlesBT.number[all_showers]
    tm_num = events.trueParticlesBT.number[to_merge]

    signals = [s_num[:, i] == tm_num for i in range(2)]
    signal_all = np.logical_or(*signals)[quantities.null] # showers we should have merged
    background = np.logical_not(signal_all) # showers we shouldn't have merged

    merged = scores != -3 # PFOs actually merged
    not_merged = np.logical_not(merged)

    print(ak.count(merged))
    print(ak.count(signal_all))

    tp = np.logical_and(merged, signal_all) # true positive, signal pfos merged
    tn = np.logical_not(np.logical_or(merged, signal_all)) # true negative, background not merged

    xor = np.logical_xor(merged, signal_all)

    fp = np.logical_and(xor, signal_all == False) # false positive, background PFOs merged
    fn = np.logical_and(xor, signal_all == True) # false negative, signal PFOs not merged

    nSignal = ak.count(signal_all[signal_all])
    nBackground = ak.count(background[background])
    nMerged = ak.count(merged[merged])
    nUnmerged = ak.count(not_merged[not_merged])

    signal_num = events.trueParticlesBT.number[to_merge][quantities.null][signal_all]
    target_num = events.trueParticlesBT.number[to_merge][quantities.null][tp]
    actual_num = ak.where(scores == 0, s_num[:, 0], scores)
    actual_num = ak.where(actual_num == 1, s_num[:, 1], actual_num)
    actual_num = actual_num[tp]

    actual_num = actual_num[ak.num(actual_num) > 0]
    target_num = target_num[ak.num(target_num) > 0]

    mismatch = ak.ravel(actual_num == target_num)
    mismatch_rate = ak.count(mismatch[mismatch == False]) / ak.count(mismatch)
    print(f"mismatch (%): {100 * mismatch_rate}")

    print(f"number of signal PFOs before cutting: {nSignal}")
    print(f"number of background PFOs before cutting: {nBackground}")
    table = [
        ["performance metric", "number of PFOs", "percentage (signal/background)", "percentage (merged, unmerged)"],
        ["signal PFOs merged and correctly matched", int(ak.count(tp[tp]) * (1 - mismatch_rate)), 100 * (1 - mismatch_rate) * ak.count(tp[tp])/nSignal, 100 * (1 - mismatch_rate) * ak.count(tp[tp])/nMerged],
        ["signal PFOs merged and incorrectly matched", int(ak.count(tp[tp]) * mismatch_rate), 100 * mismatch_rate * ak.count(tp[tp])/nSignal, 100 * mismatch_rate * ak.count(tp[tp])/nMerged],
        ["background PFOs merged (false positive)", ak.count(fp[fp]), 100 * ak.count(fp[fp])/nBackground, 100 * ak.count(fp[fp])/nMerged],
        ["signal PFOs not merged (false negative)", ak.count(fn[fn]), 100 * ak.count(fn[fn])/nSignal, 100 * ak.count(fn[fn])/nUnmerged],
        ["background PFOs not merged (true negatives)", ak.count(tn[tn]), 100 * ak.count(tn[tn])/nBackground, 100 * ak.count(tn[tn])/nUnmerged],
        ["signal PFOs correctly matched", "-", 100 * (1-mismatch_rate), 100 * (1-mismatch_rate)]
        ]
    #["signal PFOs merged (true positives)", ak.count(tp[tp]), 100 * ak.count(tp[tp])/nSignal, 100 * ak.count(tp[tp])/nMerged],

    print(f"scores: {ak.count(scores)}")
    print(f"to_merge: {ak.count(to_merge[quantities.null])}")

    print(tabulate(table, floatfmt=".2f", tablefmt="fancy_grid"))

    mask = quantities.mask
    cut_signal = signal_all[np.logical_or(*mask)]
    print(ak.count(ak.ravel(cut_signal[cut_signal])))
    globals().update(locals()) #! debugging only!

ShowerMergingPFOPerformance(start_showers, to_merge, scores, quantities)

135289
135289
mismatch (%): 1.8633540372670807
number of signal PFOs before cutting: 1539
number of background PFOs before cutting: 133750
scores: 135289
to_merge: 135289
╒═════════════════════════════════════════════╤════════════════╤════════════════════════════════╤═══════════════════════════════╕
│ performance metric                          │ number of PFOs │ percentage (signal/background) │ percentage (merged, unmerged) │
├─────────────────────────────────────────────┼────────────────┼────────────────────────────────┼───────────────────────────────┤
│ signal PFOs merged and correctly matched    │ 158            │ 10.266406757634826             │ 95.18072289156625             │
├─────────────────────────────────────────────┼────────────────┼────────────────────────────────┼───────────────────────────────┤
│ signal PFOs merged and incorrectly matched  │ 3              │ 0.1949317738791423             │ 1.8072289156626506            │
├─────────────────────────────────────────────┼──

In [6]:
def ShowerMergingEventPerformance(start_showers, to_merge, scores, quantities):
    all_showers = np.logical_or(*start_showers)
    s_num = events.trueParticlesBT.number[all_showers]
    tm_num = events.trueParticlesBT.number[to_merge]

    signal = np.logical_or(*[tm_num == s_num[:, i] for i in range(2)])[quantities.null] # showers we should have merged
    background = np.logical_not(signal) # showers we shouldn't have merged

    merged = scores != -3 # PFOs actually merged
    not_merged = np.logical_not(merged)

    nMerged = ak.count(merged[merged], -1)
    nSignal = ak.count(signal[signal], -1)
    nBackground = ak.count(background[background], -1)

    tp = np.logical_and(merged, signal) # true positive
    nTp = ak.count(tp[tp], -1)

    xor = np.logical_xor(merged, signal)

    fp = np.logical_and(xor, signal == False) # false positive
    nFp = ak.count(fp[fp], -1)
    t = np.logical_and(nFp > 0, nTp > 0)

    signal_only = np.logical_and(nFp == 0, nTp > 0)
    background_only = np.logical_and(nFp > 0, nTp == 0)

    n = ak.count(events.eventNum)
    n_t = ak.count(nSignal[nSignal > 0])
    n_m = ak.count(nMerged[nMerged > 0])
    print(f"number of events: {n}")
    print(f"number of events with PFOs to merge: {n_t}")
    print(f"number of events where we merge: {n_m}")
    print(f"number of events where we merge signal: {ak.count(nTp[nTp > 0])}")
    print(f"number of events where we merge background: {ak.count(nFp[nFp > 0])}")
    print(f"number of events where we merge signal and background {ak.count(t[t])}")
    print(f"number of events where we merge only signal {ak.count(signal_only[signal_only])}")
    print(f"number of events where we merge only background {ak.count(background_only[background_only])}")

    table = [
        ["performance metric", "number of events", "total efficiency", "merging efficiency"],
        ["signal merged", ak.count(nTp[nTp > 0]), 100 * ak.count(nTp[nTp > 0]) / n_t, 100 * ak.count(nTp[nTp > 0]) / n_m],
        ["only signal merged", ak.count(signal_only[signal_only]), 100 * ak.count(signal_only[signal_only]) / n_t, 100 * ak.count(signal_only[signal_only]) / n_m],
        ["signal and background merged", ak.count(t[t]), 100 * ak.count(t[t]) / n_t, 100 * ak.count(t[t]) / n_m]
        ]

    print(tabulate(table, floatfmt=".2f", tablefmt="fancy_grid"))

ShowerMergingEventPerformance(start_showers, to_merge, scores, quantities)

number of events: 707
number of events with PFOs to merge: 487
number of events where we merge: 118
number of events where we merge signal: 116
number of events where we merge background: 5
number of events where we merge signal and background 3
number of events where we merge only signal 113
number of events where we merge only background 2
╒══════════════════════════════╤══════════════════╤════════════════════╤════════════════════╕
│ performance metric           │ number of events │ total efficiency   │ merging efficiency │
├──────────────────────────────┼──────────────────┼────────────────────┼────────────────────┤
│ signal merged                │ 116              │ 23.81930184804928  │ 98.30508474576271  │
├──────────────────────────────┼──────────────────┼────────────────────┼────────────────────┤
│ only signal merged           │ 113              │ 23.20328542094456  │ 95.76271186440678  │
├──────────────────────────────┼──────────────────┼────────────────────┼────────────────────