In [None]:
# Let's Look at characterizing the TPC Etas

In [None]:
import ROOT
import numpy as np
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import sys
import multiprocessing as mp
import uproot
import pandas as pd
import pickle
import h5py
import gzip
import math
import timeit


#infile = "../NTuples/test_truth.root"
#infile = "../NTuples/full_tpceta_sample.root"

# New Stuff
#infile = "../NTuples/tpcetanocosmic_full_sample_v2.root"
infile = "../NTuples/eta_production_ntuple_test_nodirt.root"
#infile = "../NTuples/production_ntuples_v2.root"

In [None]:
inFile = uproot.open(infile)

inFileROOT = ROOT.TFile.Open(infile, "READ")
#h_tot_pot = inFileROOT.Get("TotalPOT")
h_tot_pot = inFileROOT.Get("TOTPOT_Clone")
TOT_POT = h_tot_pot.GetBinContent(1)
inFileROOT.Close()
TOT_POT = f"{TOT_POT:.2e}"
print("Total POT", TOT_POT)

#pot = h_tot_pot.
#print("Total POT", pot)

#slc_tree = inFile["slc_truth_tree"]
slc_tree = inFile["slc_truth_tree"]
slc_reco_tree = inFile["slc_tree"]
particle_tree1 = inFile["particle_tree1"]
particle_tree2 = inFile["particle_tree2"]
shw_tree1 = inFile["shower_tree1"]
shw_tree2 = inFile["shower_tree2"]
trk_tree1 = inFile["track_tree1"]
trk_tree2 = inFile["track_tree2"]


slc_df = slc_tree.arrays(slc_tree.keys(), library="pd")
slc_reco_df = slc_reco_tree.arrays(slc_reco_tree.keys(), library="pd")
shw_df1 = shw_tree1.arrays(shw_tree1.keys(), library="pd")
shw_df2 = shw_tree2.arrays(shw_tree2.keys(), library="pd")
trk_df1 = trk_tree1.arrays(trk_tree1.keys(), library="pd")
trk_df2 = trk_tree2.arrays(trk_tree2.keys(), library="pd")

particle_df1 = particle_tree1.arrays(particle_tree1.keys(), library="pd")
particle_df2 = particle_tree2.arrays(particle_tree2.keys(), library="pd")


slc_df[:2]

In [None]:
def isTPC(row):
    if (-200 <= row["vtx_x"] <= 200) and (-200 <= row["vtx_y"] <= 200) and (0 <= row["vtx_z"] <= 500):
        return 1
    else:
        return 0

slc_df["isTPC"] = slc_df.apply(isTPC, axis=1)
slc_df[:2]

In [None]:
def map_pdg_counts_to_slices(slc_df_t, other_df, pdg, new_col):
    filtered = other_df[other_df['pdg'] == pdg]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)


map_pdg_counts_to_slices(slc_df, particle_df1, 111, "npi0")
slc_df[:2]

In [None]:
map_pdg_counts_to_slices(slc_df, particle_df1, 221, "neta")
map_pdg_counts_to_slices(slc_df, particle_df1, 22, "ngamma")
map_pdg_counts_to_slices(slc_df, particle_df1, 13, "nmuminus")
slc_df[:2]

In [None]:
shw_df1[:2]

In [None]:
interaction_codes = {
    0: "QE",
    1: "Resonant",
    2: "DIS",
    3: "Coherent",
    4: "Coherent Elastic",
    5: "Electron scatt.",
    6: "IMDAnnihilation",
    7: r"Inverse $\beta$ decay",
    8: "Glashow resonance",
    9: "AMNuGamma",
    10: "MEC",
    11: "Diffractive",
    12: "EM",
    13: "Weak Mix"
}


basic_topology_labels = {
    0:r"RES: $\nu_{\mu}\eta$",
    1:r"QE: $\nu_{\mu}\eta$",
    2:r"DIS: $\nu_{\mu}\eta$",
    3:r"MEC: $\nu_{\mu}\eta$",
    4:r"$\bar{\nu}_{\mu}\eta$",
    5:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    6:r"$\nu_{\mu}\eta \rightarrow 0\pi^{0}$",
    7:"Other"

}

basic_topology_selections = {
    0:"pdg == 14.0 and mode == 1.0 and neta > 0",
    1:"pdg == 14.0 and mode == 0.0 and neta > 0",
    2:"pdg == 14.0 and mode == 2.0 and neta > 0",
    3:"pdg == 14.0 and mode == 10.0 and neta > 0",
    4:"pdg == -14.0 and neta > 0",
    5:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    6: "pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0", 
    7: "!(pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0)"
}


# differentiate between a few interesting eta topologies
topology_labels1 = {

    0:r"RES: $\nu_{\mu}CC\eta \rightarrow 0\pi^{0}$",
    1:r"RES: $\nu_{\mu}CC\eta \rightarrow 1\pi^{0}$",
    2:r"RES: $\nu_{\mu}CC\eta \rightarrow 2\pi^{0}$",
    3:r"RES: $\nu_{\mu}CC\eta \rightarrow 3\pi^{0}$",
    
    4:r"RES: $\nu_{\mu}NC\eta \rightarrow 0\pi^{0}$",
    5:r"RES: $\nu_{\mu}NC\eta \rightarrow 1\pi^{0}$",
    6:r"RES: $\nu_{\mu}NC\eta \rightarrow 2\pi^{0}$",
    7:r"RES: $\nu_{\mu}NC\eta \rightarrow 3\pi^{0}$",

    8:r"Not RES: $\nu_{\mu}\eta$",
    9:r"$\bar{\nu}_{\mu}\eta$",
    10:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    11:r"$\nu_{\mu}$ CC $1\pi^{0}$",
    12:r"$\nu_{\mu}$ NC $1\pi^{0}$",
    13:r"$\nu_{\mu}$ CC Other",
    14:r"$\nu_{\mu}$ NC Other",
    15:r"$\nu$ Other",
    16:r"Dirt $\nu$",
    17:"Cosmic",
    18:r"RES $\nu_{\mu}\eta \rightarrow > 3\pi^{0}$"
       
}

topology_selections1 = {
    
    0:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    1:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    2:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    3:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",
    
    4:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    5:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    6:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    7:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",

    8:"pdg == 14.0 and mode != 1.0 and neta > 0 and isTPC == 1",
    9:"pdg == -14.0 and neta > 0",
    10:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    11:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    12:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    13:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    14:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    15:"(pdg == 12 or pdg == -12 or pdg == -14) and neta == 0 and isTPC == 1",
    16:"(pdg == 14 or pdg == -14 or pdg == 12 or pdg == -12) and isTPC == 0",
    17:"pdg == -1",
    #18:"(pdg != 14 and pdg != -14 and pdg != 12 and pdg != -12 and pdg != -1)"
    18:"pdg == 14 and mode == 1.0 and neta > 0 and npi0 > 3 and isTPC == 1"
    
}

topology_colors1 = {
    0:"magenta", 
    1:"purple",
    2:"violet",
    3:"deeppink",
    4:"blue",
    5:"navy",
    6:"royalblue",
    7:"lavender",
    8:"indigo",
    9:"green",
    10:"lime",
    11:"cyan",
    12:"aquamarine",
    13:"red",
    14:"maroon",
    15:"orange",
    16:"coral",
    17:"gray",
    18:"red"
}



In [None]:
for num in range(len(topology_selections1.keys())):
    #temp = slc_df.query(topology_selections[num])
    condition = slc_df.index.isin(slc_df.query(topology_selections1[num]).index)
    slc_df.loc[condition, "TOP"] = num

slc_reco_df["TOP"] = slc_df["TOP"]
slc_reco_df[:2]

In [None]:
df_small_filtered = slc_df[['run', 'subrun', 'evt', 'slc', 'TOP']]
shw_df1 = shw_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
trk_df1 = trk_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')

shw_df1[:2]

In [None]:
shw_df2["TOP"] = shw_df1["TOP"]
trk_df2["TOP"] = trk_df1["TOP"]

In [None]:
def plot_selection_1D(df, variable, B, units, sel=None, log=0, rangex=None, rangey=None):
    new_df = df.copy()
    if sel != None:
        new_df = new_df.query(sel)
    
    all_counts = []
    #all_norms = []

    counts_all, bin_edges_all, _ = plt.hist(new_df[variable].values, bins=B)
    # Plot the scaled histogram using plt.bar
    bin_centers = (bin_edges_all[:-1] + bin_edges_all[1:]) / 2  # calculate bin centers
    bin_width = bin_edges_all[1] - bin_edges_all[0]  # calculate bin width

    for num in range(len(topology_selections1.keys())):
        temp = new_df.query("TOP == "+str(num))
        counts, bin_edges, _ = plt.hist(temp[variable].values, bins=B)
        all_counts.append(counts)
        #all_norms.append(temp.shape[0])

    # clear the plot
    plt.clf()

    N_prev = np.zeros_like(bin_centers)
    for num in range(len(topology_selections1.keys())):
        h = ""
        if num % 2 != 0:
            h="//"
        
        plt.bar(bin_centers, all_counts[num], width=bin_width, 
                bottom=N_prev, color=topology_colors1[num], alpha=0.6, hatch=h, label=topology_labels1[num])
        N_prev += all_counts[num]
    

    #plt.xlabel("True Number of Primary Gammas", fontsize=14)
    if sel != None:
        plt.xlabel(variable + " ("+sel+") ["+units+"]", fontsize=14)
    else:
        plt.xlabel(variable + " ["+units+"]", fontsize=14)
        
    plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
    plt.errorbar(bin_centers, counts_all, yerr=np.sqrt(counts_all), c="black", fmt="o", label="All")
    if rangex != None:
        plt.xlim(rangex)
        
    if rangey != None:
        plt.ylim(rangey)
    
    # Display the plot
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', ncols=2)
    if log:
        plt.yscale("log")
    plt.show()

In [None]:
plot_selection_1D(shw_df1, "trackScore", np.linspace(-1, 2, 7), "Arbitrary", log=1)

In [None]:
# Step 2: Group by run and subrun and count occurrences
counts = shw_df1.groupby(['run', 'subrun', 'evt', 'slc']).size()

slc_reco_df["npfp"] = slc_reco_df.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)
slc_reco_df[:2]

In [None]:
plot_selection_1D(slc_reco_df, "npfp", np.linspace(-0.5, 11.5, 13), "Counts", log=0)

In [None]:
plot_selection_1D(slc_df, "ngamma", np.linspace(-0.5, 5.5, 7), "Counts", log=0)

In [None]:
map_pdg_counts_to_slices(slc_df, particle_df1, 2212, "nproton")
map_pdg_counts_to_slices(slc_df, particle_df1, 2112, "nneutron")

In [None]:
plot_selection_1D(slc_df, "nproton", np.linspace(-0.5, 5.5, 7), "Counts", log=0)

In [None]:
plot_selection_1D(slc_df, "nneutron", np.linspace(-0.5, 5.5, 7), "Counts", log=0)

In [None]:
plot_selection_1D(slc_df, "nmuminus", np.linspace(-0.5, 7.5, 9), "Counts", log=0)

In [None]:
plot_selection_1D(slc_df, "neta", np.linspace(-0.5, 5.5, 7), "Counts", log=0)

In [None]:
def map_eta_energy_to_slices(slc_df_t, other_df, new_col):
    filtered = other_df[other_df['pdg'] == 221]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc'])["genE"].mean()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(float)

map_eta_energy_to_slices(slc_df, particle_df1, "etaE")
slc_df[:2]
#plot_selection_1D(particle_df1, "genE", np.linspace(-0.5, 5.5, 7), "Counts", sel="pdg == 221", log=0)

In [None]:
plot_selection_1D(slc_df, "etaE", np.linspace(0, 2.5, 30), "GeV", log=0)

In [None]:
def get_nshowers(slc_df_t, shw_df_t, score, new_col):
    filtered = shw_df_t[shw_df_t['trackScore'] <= score]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)


get_nshowers(slc_reco_df, shw_df1, 0.0, "nshowers")
slc_reco_df[:2]


In [None]:
slc_reco_df["inTPC"] = slc_reco_df.apply(isTPC, axis=1)
slc_reco_df[:2]

In [None]:
plot_selection_1D(slc_reco_df, "nshowers", np.linspace(-0.5, 7.5, 9), "Counts", sel="inTPC == 1", log=0)

In [None]:
plot_selection_1D(slc_reco_df, "nshowers", np.linspace(-0.5, 7.5, 9), "Counts", sel="inTPC == 1", log=0)

In [None]:
def get_nshowers_extra(slc_df_t, shw_df_t, score, cut, new_col):
    filtered = shw_df_t.query(cut)[shw_df_t.query(cut)['trackScore'] <= score]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)

get_nshowers_extra(slc_reco_df, shw_df1, 0.0, "bestplane_energy > 0.02", "nshowersEcut")
slc_reco_df[:2]

In [None]:
plot_selection_1D(slc_reco_df, "nshowersEcut", np.linspace(-0.5, 7.5, 9), "Counts", sel="inTPC == 1", log=0)

In [None]:
def get_purity(df, top):
    N_top = df.query("TOP == "+str(top)).shape[0]
    N_tot = df.shape[0]
    r = (1.0*N_top)/N_tot
    return r

print("Top 0 Purity", get_purity(slc_df, 0))
