In [3]:
import awkward as ak
import numpy as np
import pandas as pd
from tabulate import tabulate

from apps.prod4a_merge_study import (BestCut, EventSelection, PFOSelection,
                                     ShowerMergeQuantities, SplitSample)
from python.analysis import Master, Plots

events = Master.Data("work/ROOTFiles/v3/Prod4a_1GeV_BeamSim_00.root")
cuts = "csv/cuts/analysedCuts-1GeV.csv"
# cuts = "work/2023/Prod4a_1GeV_BeamSim_analysis/prod4a_merge_study/analysedCuts.csv"
EventSelection(events)
PFOSelection(events)
start_showers, to_merge = SplitSample(events)

# mask = np.logical_or(events.recoParticles.cnnScore > 0.64, np.logical_or(*start_showers)) # select PFOs which pass CNN selection or are start showers
# events.Filter([mask])
# start_showers = [start_showers[i][mask] for i in range(2)]
# to_merge = to_merge[mask]

quantities = ShowerMergeQuantities(events, to_merge, cuts)
quantities.bestCut = "purity"

n_merge = -1

print("calculating score...")
def SortByStartingShower(data):
    return ak.concatenate([ak.unflatten(data[i], 1, -1) for i in range(2)], -1)

def ClosestQuantity(q : ak.Array, mask : ak.Array):
    masked_q = ak.where(mask, q, 9999999)
    q_to_merge = ak.argmin(masked_q, -1, keepdims=True)
    return ak.where(ak.min(masked_q, -1, keepdims=True) == 9999999, -1, q_to_merge)

#* retrieve quantities and find which start shower is closest to each PFO for each variable
quantities.Evaluate(events, start_showers)
print("evaluated quantities")
mask = SortByStartingShower(quantities.mask) # PFOs we want to merge after cut based selection is done

alpha = ClosestQuantity(SortByStartingShower(quantities.alpha), mask) # can use this to determine which starting shower the PFO is closest to in angle
x = ClosestQuantity(SortByStartingShower(quantities.delta_x), mask) # can use this to determine which starting shower the PFO is closest to in space
phi = ClosestQuantity(SortByStartingShower(quantities.delta_phi), mask) # can use this to determine which starting shower the PFO direction is most aligned to
print(mask)

#* figure out which is the common start shower between all variables
# if min phi, alpha and x are all the same then merge to that shower
# if two are the same, merge to the most common shower
# if none agree (shouldn't be possible)
#! should replace this with calculating the mode of the scores
scores = ak.sum(ak.concatenate([phi, x, alpha], -1), -1)
scores = ak.where(scores == 1, 0, scores) # [1, 0, 0]
scores = ak.where(scores == 2, 1, scores) # [1, 1, 0]
scores = ak.where(scores == 3, 1, scores) # [1, 1, 1]





calculating score...


evaluated quantities
[[[False, False], [False, False], [False, ... [False, False], [False, False]]]


In [4]:
def CountMask(mask : ak.Array, axis : int = None):
    return ak.count(mask[mask], axis = axis)

def ShowerMergingPFOPerformance(start_showers, to_merge, scores, quantities):
    # false negative - showers we should have merged but didn't
    # false positive - showers we merged but shouldn't have
    # true positive - showers we should have merged and did
    # true negative - showers we should have merged but didn't
    # mismatch - of the showers merged, which were assigned to the wrong start shower

    #! not using SignalBackground method here to be explicit when defining signal and background masks
    all_showers = np.logical_or(*start_showers)
    s_num = events.trueParticlesBT.number[all_showers]
    tm_num = events.trueParticlesBT.number[to_merge]

    signals = [s_num[:, i] == tm_num for i in range(2)]
    signal_all = np.logical_or(*signals) # showers we should have merged
    background = np.logical_not(signal_all) # showers we shouldn't have merged

    merged = scores != -3 # PFOs actually merged
    not_merged = ~merged

    print(ak.count(merged))
    print(ak.count(signal_all))
    n = ak.count(signal_all)

    tp = merged & signal_all # true positive, signal pfos merged
    nTp = CountMask(tp)
    tn = ~(merged | signal_all) # true negative, background not merged
    nTn = CountMask(tn)

    xor = merged != signal_all

    fp = xor & (signal_all == False) # false positive, background PFOs merged
    nFp = CountMask(fp)
    fn = xor & (signal_all == True) # false negative, signal PFOs not merged
    nFn = CountMask(fn)

    nSignal = CountMask(signal_all)
    nBackground = CountMask(background)
    nMerged = CountMask(merged)
    nUnmerged = CountMask(not_merged)

    signal_num = events.trueParticlesBT.number[to_merge][signal_all]
    target_num = events.trueParticlesBT.number[to_merge][tp]
    actual_num = ak.where(scores == 0, s_num[:, 0], scores)
    actual_num = ak.where(actual_num == 1, s_num[:, 1], actual_num)
    actual_num = actual_num[tp]

    actual_num = actual_num[ak.num(actual_num) > 0]
    target_num = target_num[ak.num(target_num) > 0]

    mismatch = ak.ravel(actual_num == target_num)
    mismatch_rate = ak.count(mismatch[mismatch == False]) / ak.count(mismatch)
    matched_rate = 1 - mismatch_rate
    print(f"mismatch (%): {100 * mismatch_rate}")

    print(f"number of signal PFOs before cutting: {nSignal}")
    print(f"number of background PFOs before cutting: {nBackground}")
    table = [
        ["PFO performance metric"                     , "number of PFOs"        , "total efficiency (%)"     , "signal/background efficiency (%)", "merged/unmerged efficiency (%)" ],
        ["signal PFOs merged and correctly matched"   , int(nTp * matched_rate) , 100 * nTp * matched_rate/n , 100 * matched_rate * nTp/nSignal  , 100 * matched_rate * nTp/nMerged ],
        ["signal PFOs merged and incorrectly matched" , int(nTp * mismatch_rate), 100 * nTp * mismatch_rate/n, 100 * mismatch_rate * nTp/nSignal , 100 * mismatch_rate * nTp/nMerged],
        ["background PFOs merged (false positive)"    , nFp                     , 100 * nFp/n                , 100 * nFp/nBackground             , 100 * nFp/nMerged                ],
        ["signal PFOs not merged (false negative)"    , nFn                     , 100 * nFn/n                , 100 * nFn/nSignal                 , 100 * nFn/nUnmerged              ],
        ["background PFOs not merged (true negatives)", nTn                     , 100 * nTn/n                , 100 * nTn/nBackground             , 100 * nTn/nUnmerged              ],
        ["signal PFOs correctly matched"              , "-"                     , "-"                        , 100 * matched_rate                , 100 * matched_rate               ]
        ]
    #["signal PFOs merged (true positives)", ak.count(tp[tp]), 100 * ak.count(tp[tp])/nSignal, 100 * ak.count(tp[tp])/nMerged],

    print(f"scores: {ak.count(scores)}")
    print(f"to_merge: {ak.count(to_merge)}")

    print(tabulate(table, floatfmt = ".2f", tablefmt = "fancy_grid"))

    mask = quantities.mask
    cut_signal = signal_all[np.logical_or(*mask)]
    print(ak.count(ak.ravel(cut_signal[cut_signal])))
    globals().update(locals()) #! debugging only!

ShowerMergingPFOPerformance(start_showers, to_merge, scores, quantities)

182964
182964
mismatch (%): 0.6024096385542169
number of signal PFOs before cutting: 1625
number of background PFOs before cutting: 181339
scores: 182964
to_merge: 184384
╒═════════════════════════════════════════════╤════════════════╤═══════════════════════╤══════════════════════════════════╤════════════════════════════════╕
│ PFO performance metric                      │ number of PFOs │ total efficiency (%)  │ signal/background efficiency (%) │ merged/unmerged efficiency (%) │
├─────────────────────────────────────────────┼────────────────┼───────────────────────┼──────────────────────────────────┼────────────────────────────────┤
│ signal PFOs merged and correctly matched    │ 165            │ 0.09018167508362301   │ 10.153846153846153               │ 89.1891891891892               │
├─────────────────────────────────────────────┼────────────────┼───────────────────────┼──────────────────────────────────┼────────────────────────────────┤
│ signal PFOs merged and incorrectly matched

In [5]:
def ShowerMergingEventPerformance(start_showers, to_merge, scores):
    all_showers = np.logical_or(*start_showers)
    s_num = events.trueParticlesBT.number[all_showers]
    tm_num = events.trueParticlesBT.number[to_merge]

    signal = np.logical_or(*[tm_num == s_num[:, i] for i in range(2)]) # showers we should have merged
    background = ~signal # showers we shouldn't have merged

    merged = scores != -3 # PFOs actually merged
    not_merged = ~merged

    nMerged = CountMask(merged, -1)
    nSignal = CountMask(signal, -1)
    nBackground = CountMask(background, -1)

    tp = merged & signal # true positive
    nTp = CountMask(tp, -1)

    xor = merged != signal

    fp = xor & (signal == False) # false positive
    nFp = CountMask(fp, -1)
    t = (nFp > 0) & (nTp > 0)

    signal_only = (nFp == 0) & (nTp > 0)
    background_only = (nFp > 0) & (nTp == 0)

    n = ak.count(events.eventNum)
    n_t = ak.count(nSignal[nSignal > 0])
    n_m = ak.count(nMerged[nMerged > 0])
    print(f"number of events after selection: {n}")
    print(f"number of events with PFOs to merge: {n_t}")
    print(f"number of events where we merge: {n_m}")
    print(f"number of events where we merge signal: {ak.count(nTp[nTp > 0])}")
    print(f"number of events where we merge background: {ak.count(nFp[nFp > 0])}")
    print(f"number of events where we merge signal and background {CountMask(t)}")
    print(f"number of events where we merge only signal {CountMask(signal_only)}")
    print(f"number of events where we merge only background {CountMask(background_only)}")

    table = [
        ["performance metric", "number of events", "total efficiency", "merging efficiency"],
        ["signal merged"               , ak.count(nTp[nTp > 0]), 100 * ak.count(nTp[nTp > 0]) / n_t, 100 * ak.count(nTp[nTp > 0]) / n_m],
        ["only signal merged"          , CountMask(signal_only), 100 * CountMask(signal_only) / n_t, 100 * CountMask(signal_only) / n_m],
        ["signal and background merged", CountMask(t)          , 100 * CountMask(t) / n_t          , 100 * CountMask(t) / n_m]
        ]

    print(tabulate(table, floatfmt=".2f", tablefmt="fancy_grid"))

ShowerMergingEventPerformance(start_showers, to_merge, scores)

number of events after selection: 710
number of events with PFOs to merge: 500
number of events where we merge: 130
number of events where we merge signal: 115
number of events where we merge background: 18
number of events where we merge signal and background 3
number of events where we merge only signal 112
number of events where we merge only background 15
╒══════════════════════════════╤══════════════════╤══════════════════╤════════════════════╕
│ performance metric           │ number of events │ total efficiency │ merging efficiency │
├──────────────────────────────┼──────────────────┼──────────────────┼────────────────────┤
│ signal merged                │ 115              │ 23.0             │ 88.46153846153847  │
├──────────────────────────────┼──────────────────┼──────────────────┼────────────────────┤
│ only signal merged           │ 112              │ 22.4             │ 86.15384615384616  │
├──────────────────────────────┼──────────────────┼──────────────────┼────────────────