In [None]:
# Study Selections of Eta Mesons

In [None]:
import ROOT
import numpy as np
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import sys
import multiprocessing as mp
import uproot
import pandas as pd
import pickle
import h5py
import gzip
import math
import timeit




# Latest Eta Production
infile = "../NTuples/eta_production_nodirt_ntuple_batch1.root"


In [None]:
inFile = uproot.open(infile)

inFileROOT = ROOT.TFile.Open(infile, "READ")
#h_tot_pot = inFileROOT.Get("TotalPOT")
h_tot_pot = inFileROOT.Get("TOTPOT_Clone")
TOT_POT = h_tot_pot.GetBinContent(1)
inFileROOT.Close()
TOT_POT = f"{TOT_POT:.2e}"
print("Total POT", TOT_POT)

#slc_tree = inFile["slc_truth_tree"]
slc_tree = inFile["slc_truth_tree"]
slc_reco_tree = inFile["slc_tree"]
pfp_tree = inFile["pfp_tree"]
shw_tree1 = inFile["shower_tree1"]
shw_tree2 = inFile["shower_tree2"]
trk_tree1 = inFile["track_tree1"]
trk_tree2 = inFile["track_tree2"]

particle_tree1 = inFile["particle_tree1"]
particle_tree2 = inFile["particle_tree2"]
daughter_tree1 = inFile["daughter_tree1"]
daughter_tree2 = inFile["daughter_tree2"]

slc_df = slc_tree.arrays(slc_tree.keys(), library="pd")
slc_reco_df = slc_reco_tree.arrays(slc_reco_tree.keys(), library="pd")
pfp_df = pfp_tree.arrays(pfp_tree.keys(), library="pd")

shw_df1 = shw_tree1.arrays(shw_tree1.keys(), library="pd")
shw_df2 = shw_tree2.arrays(shw_tree2.keys(), library="pd")
trk_df1 = trk_tree1.arrays(trk_tree1.keys(), library="pd")
trk_df2 = trk_tree2.arrays(trk_tree2.keys(), library="pd")

particle_df1 = particle_tree1.arrays(particle_tree1.keys(), library="pd")
particle_df2 = particle_tree2.arrays(particle_tree2.keys(), library="pd")

daughter_df1 = daughter_tree1.arrays(daughter_tree1.keys(), library="pd")
daughter_df2 = daughter_tree2.arrays(daughter_tree2.keys(), library="pd")

slc_df[:2]

In [None]:
def isTPC(row):
    if (-200 <= row["vtx_x"] <= 200) and (-200 <= row["vtx_y"] <= 200) and (0 <= row["vtx_z"] <= 500):
        return 1
    else:
        return 0

slc_df["isTPC"] = slc_df.apply(isTPC, axis=1)
slc_reco_df["isTPC"] = slc_reco_df.apply(isTPC, axis=1)
slc_df[:2]

In [None]:
def map_pdg_counts_to_slices(slc_df_t, other_df, pdg, new_col):
    filtered = other_df[other_df['pdg'] == pdg]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)


map_pdg_counts_to_slices(slc_df, particle_df1, 111, "npi0")
map_pdg_counts_to_slices(slc_df, particle_df1, 221, "neta")
map_pdg_counts_to_slices(slc_df, particle_df1, 22, "ngamma")
map_pdg_counts_to_slices(slc_df, particle_df1, 13, "nmuminus")
map_pdg_counts_to_slices(slc_df, daughter_df1, 22, "ngamma_d")
slc_df[:2]

In [None]:
interaction_codes = {
    0: "QE",
    1: "Resonant",
    2: "DIS",
    3: "Coherent",
    4: "Coherent Elastic",
    5: "Electron scatt.",
    6: "IMDAnnihilation",
    7: r"Inverse $\beta$ decay",
    8: "Glashow resonance",
    9: "AMNuGamma",
    10: "MEC",
    11: "Diffractive",
    12: "EM",
    13: "Weak Mix"
}


basic_topology_labels = {
    0:r"RES: $\nu_{\mu}\eta$",
    1:r"QE: $\nu_{\mu}\eta$",
    2:r"DIS: $\nu_{\mu}\eta$",
    3:r"MEC: $\nu_{\mu}\eta$",
    4:r"$\bar{\nu}_{\mu}\eta$",
    5:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    6:r"$\nu_{\mu}\eta \rightarrow 0\pi^{0}$",
    7:"Other"

}

basic_topology_selections = {
    0:"pdg == 14.0 and mode == 1.0 and neta > 0",
    1:"pdg == 14.0 and mode == 0.0 and neta > 0",
    2:"pdg == 14.0 and mode == 2.0 and neta > 0",
    3:"pdg == 14.0 and mode == 10.0 and neta > 0",
    4:"pdg == -14.0 and neta > 0",
    5:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    6: "pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0", 
    7: "!(pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0)"
}


# differentiate between a few interesting eta topologies
topology_labels1 = {

    0:r"RES: $\nu_{\mu}CC\eta \rightarrow 0\pi^{0}$",
    1:r"RES: $\nu_{\mu}CC\eta \rightarrow 1\pi^{0}$",
    2:r"RES: $\nu_{\mu}CC\eta \rightarrow 2\pi^{0}$",
    3:r"RES: $\nu_{\mu}CC\eta \rightarrow 3\pi^{0}$",
    
    4:r"RES: $\nu_{\mu}NC\eta \rightarrow 0\pi^{0}$",
    5:r"RES: $\nu_{\mu}NC\eta \rightarrow 1\pi^{0}$",
    6:r"RES: $\nu_{\mu}NC\eta \rightarrow 2\pi^{0}$",
    7:r"RES: $\nu_{\mu}NC\eta \rightarrow 3\pi^{0}$",

    8:r"Not RES: $\nu_{\mu}\eta$",
    9:r"$\bar{\nu}_{\mu}\eta$",
    10:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    11:r"$\nu_{\mu}$ CC $1\pi^{0}$",
    12:r"$\nu_{\mu}$ NC $1\pi^{0}$",
    13:r"$\nu_{\mu}$ CC Other",
    14:r"$\nu_{\mu}$ NC Other",
    15:r"$\nu$ Other",
    16:r"Dirt $\nu$",
    17:"Cosmic",
    18:r"RES $\nu_{\mu}\eta \rightarrow > 3\pi^{0}$"
       
}

topology_selections1 = {
    
    0:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    1:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    2:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    3:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",
    
    4:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    5:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    6:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    7:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",

    8:"pdg == 14.0 and mode != 1.0 and neta > 0 and isTPC == 1",
    9:"pdg == -14.0 and neta > 0",
    10:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    11:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    12:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    13:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    14:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    15:"(pdg == 12 or pdg == -12 or pdg == -14) and neta == 0 and isTPC == 1",
    16:"(pdg == 14 or pdg == -14 or pdg == 12 or pdg == -12) and isTPC == 0",
    17:"pdg == -1",
    #18:"(pdg != 14 and pdg != -14 and pdg != 12 and pdg != -12 and pdg != -1)"
    18:"pdg == 14 and mode == 1.0 and neta > 0 and npi0 > 3 and isTPC == 1"
    
}

topology_colors1 = {
    0:"magenta", 
    1:"purple",
    2:"violet",
    3:"deeppink",
    4:"blue",
    5:"navy",
    6:"royalblue",
    7:"lavender",
    8:"indigo",
    9:"green",
    10:"lime",
    11:"cyan",
    12:"aquamarine",
    13:"red",
    14:"maroon",
    15:"orange",
    16:"coral",
    17:"gray",
    18:"red"
}



In [None]:
for num in range(len(topology_selections1.keys())):
    #temp = slc_df.query(topology_selections[num])
    condition = slc_df.index.isin(slc_df.query(topology_selections1[num]).index)
    slc_df.loc[condition, "TOP"] = num

slc_reco_df["TOP"] = slc_df["TOP"]
slc_reco_df[:2]

# Add Topologies to other DataFrames

In [None]:
df_small_filtered = slc_df[['run', 'subrun', 'evt', 'slc', 'TOP']]

pfp_df = pfp_df.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
shw_df1 = shw_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
trk_df1 = trk_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
particle_df1 = particle_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
daughter_df1 = daughter_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')


particle_df1[:2]

In [None]:
def plot_selection_1D(df, variable, B, units, sel=None, log=0, rangex=None, rangey=None):
    new_df = df.copy()
    if sel != None:
        new_df = new_df.query(sel)
    
    all_counts = []
    #all_norms = []

    counts_all, bin_edges_all, _ = plt.hist(new_df[variable].values, bins=B)
    # Plot the scaled histogram using plt.bar
    bin_centers = (bin_edges_all[:-1] + bin_edges_all[1:]) / 2  # calculate bin centers
    bin_width = bin_edges_all[1] - bin_edges_all[0]  # calculate bin width

    for num in range(len(topology_selections1.keys())):
        temp = new_df.query("TOP == "+str(num))
        counts, bin_edges, _ = plt.hist(temp[variable].values, bins=B)
        all_counts.append(counts)
        #all_norms.append(temp.shape[0])

    # clear the plot
    plt.clf()

    N_prev = np.zeros_like(bin_centers)
    for num in range(len(topology_selections1.keys())):
        h = ""
        if num % 2 != 0:
            h="//"
        
        plt.bar(bin_centers, all_counts[num], width=bin_width, 
                bottom=N_prev, color=topology_colors1[num], alpha=0.6, hatch=h, label=topology_labels1[num])
        N_prev += all_counts[num]
    

    #plt.xlabel("True Number of Primary Gammas", fontsize=14)
    if sel != None:
        plt.xlabel(variable + " ("+sel+") ["+units+"]", fontsize=14)
    else:
        plt.xlabel(variable + " ["+units+"]", fontsize=14)
        
    plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
    plt.errorbar(bin_centers, counts_all, yerr=np.sqrt(counts_all), c="black", fmt="o", label="All")
    if rangex != None:
        plt.xlim(rangex)
        
    if rangey != None:
        plt.ylim(rangey)
    
    # Display the plot
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', ncols=2)
    if log:
        plt.yscale("log")
    plt.show()

In [None]:
plot_selection_1D(slc_reco_df, "is_clear_cosmic", np.linspace(-1, 2, 7), "Arbitrary", log=1)

In [None]:
plot_selection_1D(slc_df, "neta", np.linspace(-1, 2, 7), "Arbitrary", log=1)

In [None]:
plot_selection_1D(slc_df, "npi0", np.linspace(-0.5, 5.5, 7), "Arbitrary", log=1)

In [None]:
plot_selection_1D(trk_df1, "trackScore", np.linspace(-2.5, 5.5, 9), "Arbitrary", log=1)

In [None]:
counts = shw_df1.groupby(['run', 'subrun', 'evt', 'slc']).size()

slc_reco_df["npfp"] = slc_reco_df.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)
slc_reco_df[:2]

In [None]:
plot_selection_1D(slc_reco_df, "npfp", np.linspace(0.5, 18.5, 19), "Counts", log=1)

In [None]:
plot_selection_1D(slc_reco_df, "fmatch_time", np.linspace(0, 105, 10), "us", sel="fmatch_time >= 0", log=1)

In [None]:
plot_selection_1D(slc_reco_df, "fmatch_time", np.linspace(-10000, 0, 10), "us", sel="fmatch_time <= 0", log=1)

In [None]:
plot_selection_1D(slc_reco_df, "fmatch_score", np.linspace(-20, 80, 100), "us", log=0)

In [None]:
plot_selection_1D(slc_reco_df, "fmatch_score", np.linspace(0, 60, 60), "us", sel="0 <= fmatch_score <= 60", log=1)

In [None]:
plot_selection_1D(slc_reco_df, "fmatch_score", np.linspace(-20, 0, 20), "us", sel="fmatch_score <= 0", log=1)

In [None]:
def cut_cosmic1(row):
    a = row["is_clear_cosmic"]
    if a > 0.9:
        return 1
    else:
        return 0
        
slc_reco_df["Cut"] = slc_reco_df.apply(cut_cosmic1, axis=1)

for num in range(len(topology_labels1.keys())):
    N = slc_reco_df.query("TOP == "+str(num)).shape[0]
    Nc = slc_reco_df.query("TOP == "+str(num)+" and Cut == 1").shape[0]

    plt.errorbar([1], [(1.0*N - Nc)/N], xerr=[1], yerr=[0], c=topology_colors1[num], label=topology_labels1[num])

plt.title("Is Clear Cosmic Cut", fontsize=20)
plt.ylabel("Efficiency", fontsize=14)
plt.ylim([0.4, 1.0])
plt.legend(ncol=2)
plt.show()

In [None]:
df_small_filtered = slc_reco_df[['run', 'subrun', 'evt', 'slc', 'Cut']]

pfp_df = pfp_df.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
shw_df1 = shw_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
trk_df1 = trk_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
particle_df1 = particle_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
daughter_df1 = daughter_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
slc_df["Cut"] = slc_reco_df["Cut"]
slc_df[:2]

In [None]:
trk_df2["Cut"] = trk_df1["Cut"]
shw_df2["Cut"] = shw_df1["Cut"]
particle_df2["Cut"] = particle_df1["Cut"]
daughter_df2["Cut"] = daughter_df1["Cut"]

# Cut the Cosmics
slc_reco_df = slc_reco_df.query("Cut == 0")
shw_df1 = shw_df1.query("Cut == 0")
shw_df2 = shw_df2.query("Cut == 0")
trk_df1 = trk_df1.query("Cut == 0")
trk_df2 = trk_df2.query("Cut == 0")
pfp_df = pfp_df.query("Cut == 0")
particle_df1 = particle_df1.query("Cut == 0")
particle_df2 = particle_df2.query("Cut == 0")
daughter_df1 = daughter_df1.query("Cut == 0")
daughter_df2 = daughter_df2.query("Cut == 0")


In [None]:
plot_selection_1D(slc_reco_df, "npfp", np.linspace(0.5, 18.5, 19), "Counts", log=1)

In [None]:
plot_selection_1D(slc_reco_df, "fmatch_time", np.linspace(-10000, 0, 10), "us", sel="fmatch_time <= 0", log=1)


In [None]:
plot_selection_1D(slc_reco_df, "fmatch_score", np.linspace(-20, 80, 100), "us", log=1)

In [None]:
pfp_df[:2]

In [None]:
t = pfp_df.query("TOP != 17")["t0"].values
print(t)

plot_selection_1D(pfp_df, "t0", np.linspace(min(t), max(t), 100), "us", sel="TOP != 17", log=0)