In [1]:
import itertools

import awkward as ak
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from rich import print
from tabulate import tabulate

from apps.prod4a_merge_study import BestCut, EventSelection, PFOSelection, SignalBackground, SplitSample, ShowerMergeQuantities
from python.analysis import Master, Plots, vector
plt.style.use('ggplot')

In [2]:
# some quality checks
def count_mask(arr : ak.Array, mask : ak.Array) -> ak.Array:
    return ak.count(arr[mask])

def null_mask(arr : ak.Array) -> ak.Array:
    return arr == -999

def n_null(arr : ak.Array) -> int:
    return count_mask(arr, null_mask(arr))

class structure():
    def __init__(self, arr : ak.Array) -> None:
        self.d0 = ak.count(arr) # total number of entries, ignoring the structure of the array entriely
        self.d1 = ak.num(arr, 0) # total number of entries, for the outer most entries in the array
        self.d2 = ak.num(arr, 1) # total number of entries for each second outer most entry 
        return
    def __repr__(self) -> str:
        return f"d0 : {self.d0}, d1 : {self.d1}, d2 : {self.d2}"


In [4]:
def check_quality(events : Master.Data, masks: list = []):

    r = events.recoParticles.energy
    c = events.trueParticlesBT.cheated_energy
    t = events.trueParticlesBT.energy

    for mask in masks:
        r = r[mask]
        c = c[mask]
        t = t[mask]
    
    # check stucture of ntuples
    s_r = structure(r)
    s_c = structure(c)
    s_t = structure(t)

    print(s_r)
    print(s_c)
    print(s_t)
    print(f"do the nutples have the same number of entries: {(s_r.d0 == s_c.d0) & (s_c.d0 == s_t.d0)}")
    print(f"do the ntuples have the same number of sub entries: {s_r.d1 == s_c.d1 & s_c.d1 == s_t.d1}")
    print(f"do the ntuples have the same number of sub sub entries: {ak.all(s_r.d2 == s_c.d2) and ak.all(s_c.d2 == s_t.d2)}")


    null_r = n_null(r)
    null_c = n_null(c)
    null_t = n_null(t)

    table = [["count", "reco", "cheated", "true"]]
    table += [["total", s_r.d0, s_c.d0, s_t.d0]]
    table += [["null", null_r, null_c, null_t]]

    print(tabulate(table, tablefmt = "fancy_grid"))

events = Master.Data("work/ROOTFiles/Prod4a_1GeV_BeamSim_00_a.root")
events.io.ListNTuples("energy")
events.io.ListNTuples("starte")

reco_daughter_PFP_true_byHits_EnergyByHits = events.io.Get("reco_daughter_PFP_true_byHits_EnergyByHits") # MeV
reco_daughter_PFP_true_byHits_startE = events.io.Get("reco_daughter_PFP_true_byHits_startE") # GeV
# GeV to MeV conversion for reco_daughter_PFP_true_byHits_startE
reco_daughter_PFP_true_byHits_startE = ak.where(reco_daughter_PFP_true_byHits_startE >= 0, reco_daughter_PFP_true_byHits_startE * 1000, -999)

events.trueParticlesBT.cheated_energy = events.io.Get("reco_daughter_PFP_true_byHits_EnergyByHits")
events.trueParticlesBT._TrueParticleDataBT__energy = reco_daughter_PFP_true_byHits_startE

EventSelection(events)
PFOSelection(events)
start_showers, to_merge = SplitSample(events)

In [5]:
#check_quality(events)
#check_quality(events, events.recoParticles.energy != -999)
#check_quality(events, to_merge)
s ,b, sa = SignalBackground(events, start_showers, to_merge)
#check_quality(events)
check_quality(events, [np.logical_or(*start_showers)])
check_quality(events, [to_merge, sa])