In [None]:
import ROOT
import numpy as np
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import sys
import multiprocessing as mp
import uproot
import pandas as pd
import pickle
import h5py
import gzip
import math
import timeit


#infile = "../NTuples/test_truth.root"
#infile = "../NTuples/full_tpceta_sample.root"

# New Stuff
infile = "../NTuples/tpcetanocosmic_full_sample_v2.root"
#infile = "../NTuples/production_ntuples_v2.root"

In [None]:
inFile = uproot.open(infile)

inFileROOT = ROOT.TFile.Open(infile, "READ")
#h_tot_pot = inFileROOT.Get("TotalPOT")
h_tot_pot = inFileROOT.Get("TOTPOT_Clone")
TOT_POT = h_tot_pot.GetBinContent(1)
inFileROOT.Close()
TOT_POT = f"{TOT_POT:.2e}"
print("Total POT", TOT_POT)

#pot = h_tot_pot.
#print("Total POT", pot)

#slc_tree = inFile["slc_truth_tree"]
slc_tree = inFile["slc_truth_tree"]
slc_reco_tree = inFile["slc_tree"]
particle_tree1 = inFile["particle_tree1"]
particle_tree2 = inFile["particle_tree2"]
shw_tree1 = inFile["shower_tree1"]
shw_tree2 = inFile["shower_tree2"]
trk_tree1 = inFile["track_tree1"]
trk_tree2 = inFile["track_tree2"]


slc_df = slc_tree.arrays(slc_tree.keys(), library="pd")
slc_reco_df = slc_reco_tree.arrays(slc_reco_tree.keys(), library="pd")
shw_df1 = shw_tree1.arrays(shw_tree1.keys(), library="pd")
shw_df2 = shw_tree2.arrays(shw_tree2.keys(), library="pd")
trk_df1 = trk_tree1.arrays(trk_tree1.keys(), library="pd")
trk_df2 = trk_tree2.arrays(trk_tree2.keys(), library="pd")

particle_df1 = particle_tree1.arrays(particle_tree1.keys(), library="pd")
particle_df2 = particle_tree2.arrays(particle_tree2.keys(), library="pd")


slc_df[:2]

In [None]:
particle_df1[:2]

In [None]:
particle_df2[:2]

In [None]:
def map_pdg_counts_to_slices(slc_df_t, other_df, pdg, new_col):
    filtered = other_df[other_df['pdg'] == pdg]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)


map_pdg_counts_to_slices(slc_df, particle_df1, 111, "npi0")
slc_df[:2]

In [None]:
def get_slice_neta(row):
    run = row["run"]
    subrun = row["subrun"]
    evt = row["evt"]
    sel = particle_df1.query("run == "+str(run)+ " and subrun == "+str(subrun) + " and evt == "+str(evt))
    c = sum((sel["pdg"].values == 221))
    return c

map_pdg_counts_to_slices(slc_df, particle_df1, 221, "neta")
#slc_df["neta"] = slc_df.apply(get_slice_neta, axis=1)
slc_df[:2]

In [None]:
def isTPC(row):
    if (-200 <= row["vtx_x"] <= 200) and (-200 <= row["vtx_y"] <= 200) and (0 <= row["vtx_z"] <= 500):
        return 1
    else:
        return 0

slc_df["isTPC"] = slc_df.apply(isTPC, axis=1)
slc_df[:2]

In [None]:
particle_df2["range"] = ((particle_df2["end_x"] - particle_df2["start_x"])**2 +
                         (particle_df2["end_y"] - particle_df2["start_y"])**2 +
                         (particle_df2["end_z"] - particle_df2["start_z"])**2)**0.5

plt.hist(particle_df2["range"].values, bins=100, histtype="step", linewidth=2)
plt.xlabel("Particle Range [cm]", fontsize=14)
plt.ylabel("Counts", fontsize=14)
plt.yscale('log')
plt.show()

In [None]:
interaction_codes = {
    0: "QE",
    1: "Resonant",
    2: "DIS",
    3: "Coherent",
    4: "Coherent Elastic",
    5: "Electron scatt.",
    6: "IMDAnnihilation",
    7: r"Inverse $\beta$ decay",
    8: "Glashow resonance",
    9: "AMNuGamma",
    10: "MEC",
    11: "Diffractive",
    12: "EM",
    13: "Weak Mix"
}


basic_topology_labels = {
    0:r"RES: $\nu_{\mu}\eta$",
    1:r"QE: $\nu_{\mu}\eta$",
    2:r"DIS: $\nu_{\mu}\eta$",
    3:r"MEC: $\nu_{\mu}\eta$",
    4:r"$\bar{\nu}_{\mu}\eta$",
    5:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    6:r"$\nu_{\mu}\eta \rightarrow 0\pi^{0}$",
    7:"Other"

}

basic_topology_selections = {
    0:"pdg == 14.0 and mode == 1.0 and neta > 0",
    1:"pdg == 14.0 and mode == 0.0 and neta > 0",
    2:"pdg == 14.0 and mode == 2.0 and neta > 0",
    3:"pdg == 14.0 and mode == 10.0 and neta > 0",
    4:"pdg == -14.0 and neta > 0",
    5:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    6: "pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0", 
    7: "!(pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0)"
}


# differentiate between a few interesting eta topologies
topology_labels1 = {

    0:r"RES: $\nu_{\mu}CC\eta \rightarrow 0\pi^{0}$",
    1:r"RES: $\nu_{\mu}CC\eta \rightarrow 1\pi^{0}$",
    2:r"RES: $\nu_{\mu}CC\eta \rightarrow 2\pi^{0}$",
    3:r"RES: $\nu_{\mu}CC\eta \rightarrow 3\pi^{0}$",
    
    4:r"RES: $\nu_{\mu}NC\eta \rightarrow 0\pi^{0}$",
    5:r"RES: $\nu_{\mu}NC\eta \rightarrow 1\pi^{0}$",
    6:r"RES: $\nu_{\mu}NC\eta \rightarrow 2\pi^{0}$",
    7:r"RES: $\nu_{\mu}NC\eta \rightarrow 3\pi^{0}$",

    8:r"Not RES: $\nu_{\mu}\eta$",
    9:r"$\bar{\nu}_{\mu}\eta$",
    10:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    11:r"$\nu_{\mu}$ CC $1\pi^{0}$",
    12:r"$\nu_{\mu}$ NC $1\pi^{0}$",
    13:r"$\nu_{\mu}$ CC Other",
    14:r"$\nu_{\mu}$ NC Other",
    15:r"$\nu$ Other",
    16:r"Dirt $\nu$",
    17:"Cosmic",
    18:r"RES $\nu_{\mu}\eta \rightarrow > 3\pi^{0}$"
       
}

topology_selections1 = {
    
    0:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    1:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    2:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    3:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",
    
    4:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    5:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    6:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    7:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",

    8:"pdg == 14.0 and mode != 1.0 and neta > 0 and isTPC == 1",
    9:"pdg == -14.0 and neta > 0",
    10:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    11:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    12:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    13:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    14:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    15:"(pdg == 12 or pdg == -12 or pdg == -14) and neta == 0 and isTPC == 1",
    16:"(pdg == 14 or pdg == -14 or pdg == 12 or pdg == -12) and isTPC == 0",
    17:"pdg == -1",
    #18:"(pdg != 14 and pdg != -14 and pdg != 12 and pdg != -12 and pdg != -1)"
    18:"pdg == 14 and mode == 1.0 and neta > 0 and npi0 > 3 and isTPC == 1"
    
}

topology_colors1 = {
    0:"magenta", 
    1:"purple",
    2:"violet",
    3:"deeppink",
    4:"blue",
    5:"navy",
    6:"royalblue",
    7:"lavender",
    8:"indigo",
    9:"green",
    10:"lime",
    11:"cyan",
    12:"aquamarine",
    13:"red",
    14:"maroon",
    15:"orange",
    16:"coral",
    17:"gray",
    18:"red"
}

top0 = slc_df.query(basic_topology_selections[0])
top1 = slc_df.query(basic_topology_selections[1])
top2 = slc_df.query(basic_topology_selections[2])
top3 = slc_df.query(basic_topology_selections[3])
top4 = slc_df.query(basic_topology_selections[4])
top5 = slc_df.query(basic_topology_selections[5])

In [None]:
all_modes = list(set(list(slc_df["mode"].values)))
print(all_modes)

In [None]:
# plot the pi0 branching fraction

all_vals = slc_df.query("neta > 0")["npi0"].values
top0_vals = top0["npi0"].values
top1_vals = top1["npi0"].values
top2_vals = top2["npi0"].values
top3_vals = top3["npi0"].values
top4_vals = top4["npi0"].values
top5_vals = top5["npi0"].values

B = np.linspace(-0.5, 3.5, 5)
print(B)
counts, bin_edges, _ = plt.hist(all_vals, bins=B)

counts0, bin_edges0, _ = plt.hist(top0_vals, bins=B)
counts1, bin_edges1, _ = plt.hist(top1_vals, bins=B)
counts2, bin_edges2, _ = plt.hist(top2_vals, bins=B)
counts3, bin_edges3, _ = plt.hist(top3_vals, bins=B)
counts4, bin_edges4, _ = plt.hist(top4_vals, bins=B)
counts5, bin_edges5, _ = plt.hist(top5_vals, bins=B)

N = slc_df.shape[0]

# clear the plot
plt.clf()

# Plot the scaled histogram using plt.bar
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2  # calculate bin centers
bin_width = bin_edges[1] - bin_edges[0]  # calculate bin width

stacked=True
plt.bar(bin_centers, counts0*(1.0/N), width=bin_width, color='blue', alpha=0.6, label=basic_topology_labels[0])
plt.bar(bin_centers, counts1*(1.0/N), width=bin_width, 
        bottom=counts0*(1.0/N), color='red', alpha=0.6, label=basic_topology_labels[1])
plt.bar(bin_centers, counts2*(1.0/N), width=bin_width, 
        bottom=counts1*(1.0/N)+counts0*(1.0/N), color='green', alpha=0.6, label=basic_topology_labels[2])

plt.bar(bin_centers, counts3*(1.0/N), width=bin_width, 
        bottom=(counts0+counts1+counts2)*(1.0/N), color='orange', alpha=0.6, label=basic_topology_labels[3])

plt.bar(bin_centers, counts4*(1.0/N), width=bin_width, 
        bottom=(counts0+counts1+counts2+counts3)*(1.0/N), color='purple', alpha=0.6, label=basic_topology_labels[4])

plt.bar(bin_centers, counts5*(1.0/N), width=bin_width, 
        bottom=(counts0+counts1+counts2+counts3+counts4)*(1.0/N), color='yellow', alpha=0.6, label=basic_topology_labels[5])


plt.xlabel("True Number of Primary pi0", fontsize=14)
plt.ylabel("Slice Fraction", fontsize=14)
plt.errorbar(bin_centers, counts*(1.0/N), 
             xerr=np.ones_like(bin_centers)*0.5, yerr=np.sqrt(counts)/N, c="black", fmt="o", label="All Slices")
# Display the plot
plt.legend()
plt.show()

In [None]:
PDG = 22
def get_slice_Nprim_pdg(row):
    run = row["run"]
    subrun = row["subrun"]
    evt = row["evt"]
    sel = particle_df1.query("run == "+str(run)+ " and subrun == "+str(subrun) + " and evt == "+str(evt))
    c = sum((sel["pdg"].values == PDG))
    return c

#slc_df["ngamma"] = slc_df.apply(get_slice_Nprim_pdg, axis=1)
map_pdg_counts_to_slices(slc_df, particle_df1, 22, "ngamma")
slc_df[:2]

In [None]:

all_counts = []
all_norms = []

B = np.linspace(-0.5, 4.5, 6)

counts_all, bin_edges_all, _ = plt.hist(slc_df["npi0"].values, bins=B)

for num in range(len(topology_selections1.keys())):
    temp = slc_df.query(topology_selections1[num])
    counts, bin_edges, _ = plt.hist(temp["npi0"].values, bins=B)
    all_counts.append(counts)
    all_norms.append(temp.shape[0])

# clear the plot
plt.clf()

# Plot the scaled histogram using plt.bar
bin_centers = (bin_edges_all[:-1] + bin_edges_all[1:]) / 2  # calculate bin centers
bin_width = bin_edges_all[1] - bin_edges_all[0]  # calculate bin width

N_prev = np.zeros_like(bin_centers)
for num in range(len(topology_selections1.keys())):
    h = ""
    if num % 2 != 0:
        h="//"
        
    plt.bar(bin_centers, all_counts[num], width=bin_width, 
            bottom=N_prev, color=topology_colors1[num], alpha=0.6, hatch=h, label=topology_labels1[num])
    N_prev += all_counts[num]
    

#plt.xlabel("True Number of Primary Gammas", fontsize=14)
plt.xlabel("True Number of Primary pi0", fontsize=14)
plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.errorbar(bin_centers, counts_all, 
             xerr=np.ones_like(bin_centers)*0.5, yerr=np.sqrt(counts_all), c="black", fmt="o", label="All Slices")

# Display the plot
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', ncols=2)
#plt.ylim([0, 6000])
#plt.yscale('log')
plt.show()

In [None]:

all_counts = []
all_norms = []

B = np.linspace(-300, 300, 20)

counts_all, bin_edges_all, _ = plt.hist(slc_df["vtx_x"].values, bins=B)

for num in range(len(topology_selections1.keys())):
    temp = slc_df.query(topology_selections1[num])
    counts, bin_edges, _ = plt.hist(temp["vtx_x"].values, bins=B)
    all_counts.append(counts)
    all_norms.append(temp.shape[0])

# clear the plot
plt.clf()

# Plot the scaled histogram using plt.bar
bin_centers = (bin_edges_all[:-1] + bin_edges_all[1:]) / 2  # calculate bin centers
bin_width = bin_edges_all[1] - bin_edges_all[0]  # calculate bin width

N_prev = np.zeros_like(bin_centers)
for num in range(len(topology_selections1.keys())):
    h = ""
    if num % 2 != 0:
        h="//"
        
    plt.bar(bin_centers, all_counts[num], width=bin_width, 
            bottom=N_prev, color=topology_colors1[num], alpha=0.6, hatch=h, label=topology_labels1[num])
    N_prev += all_counts[num]
    

#plt.xlabel("True Number of Primary Gammas", fontsize=14)
plt.xlabel("True Slice VTX X [cm]", fontsize=14)
plt.ylabel("Counts/bin", fontsize=14)
plt.errorbar(bin_centers, counts_all, 
             xerr=np.ones_like(bin_centers)*0.5, yerr=np.sqrt(counts_all), c="black", fmt="o", label="All Slices")

# Display the plot
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', ncols=2)
#plt.ylim([0, 200])
#plt.yscale('log')
plt.show()

In [None]:
# Let's look at some reco stuff
slc_reco_df["neta"] = slc_df["neta"]
slc_reco_df["npi0"] = slc_df["npi0"]
slc_reco_df["isTPC"] = slc_df["isTPC"]

slc_reco_df[:2]

In [None]:
for num in range(len(topology_selections1.keys())):
    #temp = slc_df.query(topology_selections[num])
    condition = slc_df.index.isin(slc_df.query(topology_selections1[num]).index)
    slc_df.loc[condition, "TOP"] = num

slc_reco_df["TOP"] = slc_df["TOP"]
slc_reco_df[:2]

In [None]:
B = np.linspace(-1.5, 1.5, 11)

all_counts = []
all_norms = []

counts_all, bin_edges_all, _ = plt.hist(slc_reco_df["nu_score"].values, bins=B)

for num in range(len(topology_selections1.keys())):
    temp = slc_reco_df.query("TOP == "+str(num))
    counts, bin_edges, _ = plt.hist(temp["nu_score"].values, bins=B)
    all_counts.append(counts)
    all_norms.append(temp.shape[0])

# clear the plot
plt.clf()

# Plot the scaled histogram using plt.bar
bin_centers = (bin_edges_all[:-1] + bin_edges_all[1:]) / 2  # calculate bin centers
bin_width = bin_edges_all[1] - bin_edges_all[0]  # calculate bin width

N_prev = np.zeros_like(bin_centers)
for num in range(len(topology_selections1.keys())):
    h = ""
    if num % 2 != 0:
        h="//"
        
    plt.bar(bin_centers, all_counts[num], width=bin_width, 
            bottom=N_prev, color=topology_colors1[num], alpha=0.6, hatch=h, label=topology_labels1[num])
    N_prev += all_counts[num]
    

#plt.xlabel("True Number of Primary Gammas", fontsize=14)
plt.xlabel("Neutrino Score", fontsize=14)
plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.errorbar(bin_centers, counts_all, yerr=np.sqrt(counts_all), c="black", fmt="o", label="All Slices")

# Display the plot
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', ncols=2)
#plt.ylim([0, 200])
#plt.yscale('log')
plt.show()

In [None]:

def plot_selection_1D(df, sel, variable, B):

    new_df = df.query(sel)
    
    all_counts = []
    #all_norms = []

    counts_all, bin_edges_all, _ = plt.hist(new_df[variable].values, bins=B)
    # Plot the scaled histogram using plt.bar
    bin_centers = (bin_edges_all[:-1] + bin_edges_all[1:]) / 2  # calculate bin centers
    bin_width = bin_edges_all[1] - bin_edges_all[0]  # calculate bin width

    for num in range(len(topology_selections1.keys())):
        temp = new_df.query("TOP == "+str(num))
        counts, bin_edges, _ = plt.hist(temp[variable].values, bins=B)
        all_counts.append(counts)
        #all_norms.append(temp.shape[0])

    # clear the plot
    plt.clf()

    N_prev = np.zeros_like(bin_centers)
    for num in range(len(topology_selections1.keys())):
        h = ""
        if num % 2 != 0:
            h="//"
        
        plt.bar(bin_centers, all_counts[num], width=bin_width, 
                bottom=N_prev, color=topology_colors1[num], alpha=0.6, hatch=h, label=topology_labels1[num])
        N_prev += all_counts[num]
    

    #plt.xlabel("True Number of Primary Gammas", fontsize=14)
    plt.xlabel(variable + "("+sel+")", fontsize=14)
    plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
    plt.errorbar(bin_centers, counts_all, yerr=np.sqrt(counts_all), c="black", fmt="o", label="All")

    # Display the plot
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', ncols=2)
    plt.yscale("log")
    plt.show()


In [None]:
df_small_filtered = slc_df[['run', 'subrun', 'evt', 'slc', 'TOP']]
shw_df1 = shw_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')

shw_df1[:2]

In [None]:

plot_selection_1D(shw_df1, "TOP < 1000", "trackScore", np.linspace(-1, 2, 7))

In [None]:
particle_df1 = particle_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')

particle_df1[:2]

In [None]:
plot_selection_1D(particle_df1, "pdg == 13", "TOP", np.linspace(-0.5,  20.5, 22))


In [None]:
def get_slice_nmuon(row):
    run = row["run"]
    subrun = row["subrun"]
    evt = row["evt"]
    sel = particle_df1.query("run == "+str(run)+ " and subrun == "+str(subrun) + " and evt == "+str(evt))
    c = sum((sel["pdg"].values == 13))
    return c

#slc_df["nmuon"] = slc_df.apply(get_slice_nmuon, axis=1)

map_pdg_counts_to_slices(slc_df, particle_df1, 13, "nmuon")

plot_selection_1D(slc_df, "neta < 1000", "nmuon", np.linspace(-0.5,  5.5, 7))