In [None]:
# Study the photons associated with eta meson production

In [None]:
import ROOT
import numpy as np
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import sys
import multiprocessing as mp
import uproot
import pandas as pd
import pickle
import h5py
import gzip
import math
import timeit


#infile = "../NTuples/test_truth.root"
#infile = "../NTuples/full_tpceta_sample.root"

# New Stuff
infile = "../NTuples/tpcetanocosmic_full_sample_v2.root"
#infile = "../NTuples/production_ntuples_v2.root"

In [None]:
inFile = uproot.open(infile)

inFileROOT = ROOT.TFile.Open(infile, "READ")
#h_tot_pot = inFileROOT.Get("TotalPOT")
h_tot_pot = inFileROOT.Get("TOTPOT_Clone")
TOT_POT = h_tot_pot.GetBinContent(1)
inFileROOT.Close()
TOT_POT = f"{TOT_POT:.2e}"
print("Total POT", TOT_POT)

#pot = h_tot_pot.
#print("Total POT", pot)

#slc_tree = inFile["slc_truth_tree"]
slc_tree = inFile["slc_truth_tree"]
slc_reco_tree = inFile["slc_tree"]
particle_tree1 = inFile["particle_tree1"]
particle_tree2 = inFile["particle_tree2"]
shw_tree1 = inFile["shower_tree1"]
shw_tree2 = inFile["shower_tree2"]
trk_tree1 = inFile["track_tree1"]
trk_tree2 = inFile["track_tree2"]


slc_df = slc_tree.arrays(slc_tree.keys(), library="pd")
slc_reco_df = slc_reco_tree.arrays(slc_reco_tree.keys(), library="pd")
shw_df1 = shw_tree1.arrays(shw_tree1.keys(), library="pd")
shw_df2 = shw_tree2.arrays(shw_tree2.keys(), library="pd")
trk_df1 = trk_tree1.arrays(trk_tree1.keys(), library="pd")
trk_df2 = trk_tree2.arrays(trk_tree2.keys(), library="pd")

particle_df1 = particle_tree1.arrays(particle_tree1.keys(), library="pd")
particle_df2 = particle_tree2.arrays(particle_tree2.keys(), library="pd")


slc_df[:2]

In [None]:
def isTPC(row):
    if (-200 <= row["vtx_x"] <= 200) and (-200 <= row["vtx_y"] <= 200) and (0 <= row["vtx_z"] <= 500):
        return 1
    else:
        return 0

slc_df["isTPC"] = slc_df.apply(isTPC, axis=1)
slc_reco_df["isTPC"] = slc_reco_df.apply(isTPC, axis=1)
slc_df[:2]

In [None]:
slc_reco_df[:2]

In [None]:
def map_pdg_counts_to_slices(slc_df_t, other_df, pdg, new_col):
    filtered = other_df[other_df['pdg'] == pdg]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)


map_pdg_counts_to_slices(slc_df, particle_df1, 111, "npi0")
map_pdg_counts_to_slices(slc_df, particle_df1, 221, "neta")
map_pdg_counts_to_slices(slc_df, particle_df1, 22, "ngamma")
map_pdg_counts_to_slices(slc_df, particle_df1, 13, "nmuminus")
slc_df[:2]

In [None]:
interaction_codes = {
    0: "QE",
    1: "Resonant",
    2: "DIS",
    3: "Coherent",
    4: "Coherent Elastic",
    5: "Electron scatt.",
    6: "IMDAnnihilation",
    7: r"Inverse $\beta$ decay",
    8: "Glashow resonance",
    9: "AMNuGamma",
    10: "MEC",
    11: "Diffractive",
    12: "EM",
    13: "Weak Mix"
}


basic_topology_labels = {
    0:r"RES: $\nu_{\mu}\eta$",
    1:r"QE: $\nu_{\mu}\eta$",
    2:r"DIS: $\nu_{\mu}\eta$",
    3:r"MEC: $\nu_{\mu}\eta$",
    4:r"$\bar{\nu}_{\mu}\eta$",
    5:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    6:r"$\nu_{\mu}\eta \rightarrow 0\pi^{0}$",
    7:"Other"

}

basic_topology_selections = {
    0:"pdg == 14.0 and mode == 1.0 and neta > 0",
    1:"pdg == 14.0 and mode == 0.0 and neta > 0",
    2:"pdg == 14.0 and mode == 2.0 and neta > 0",
    3:"pdg == 14.0 and mode == 10.0 and neta > 0",
    4:"pdg == -14.0 and neta > 0",
    5:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    6: "pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0", 
    7: "!(pdg == 14.0 and mode == 1.0 and npi0 == 0 and neta > 0)"
}


# differentiate between a few interesting eta topologies
topology_labels1 = {

    0:r"RES: $\nu_{\mu}CC\eta \rightarrow 0\pi^{0}$",
    1:r"RES: $\nu_{\mu}CC\eta \rightarrow 1\pi^{0}$",
    2:r"RES: $\nu_{\mu}CC\eta \rightarrow 2\pi^{0}$",
    3:r"RES: $\nu_{\mu}CC\eta \rightarrow 3\pi^{0}$",
    
    4:r"RES: $\nu_{\mu}NC\eta \rightarrow 0\pi^{0}$",
    5:r"RES: $\nu_{\mu}NC\eta \rightarrow 1\pi^{0}$",
    6:r"RES: $\nu_{\mu}NC\eta \rightarrow 2\pi^{0}$",
    7:r"RES: $\nu_{\mu}NC\eta \rightarrow 3\pi^{0}$",

    8:r"Not RES: $\nu_{\mu}\eta$",
    9:r"$\bar{\nu}_{\mu}\eta$",
    10:r"$\nu_{e}\eta$"+ " or "+ r"$\bar{\nu}_{e}\eta$",
    11:r"$\nu_{\mu}$ CC $1\pi^{0}$",
    12:r"$\nu_{\mu}$ NC $1\pi^{0}$",
    13:r"$\nu_{\mu}$ CC Other",
    14:r"$\nu_{\mu}$ NC Other",
    15:r"$\nu$ Other",
    16:r"Dirt $\nu$",
    17:"Cosmic",
    18:r"RES $\nu_{\mu}\eta \rightarrow > 3\pi^{0}$"
       
}

topology_selections1 = {
    
    0:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    1:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    2:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    3:"pdg == 14.0 and mode == 1.0 and iscc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",
    
    4:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 0 and isTPC == 1",
    5:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 1 and isTPC == 1",
    6:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 2 and isTPC == 1",
    7:"pdg == 14.0 and mode == 1.0 and isnc == 1.0 and neta > 0 and npi0 == 3 and isTPC == 1",

    8:"pdg == 14.0 and mode != 1.0 and neta > 0 and isTPC == 1",
    9:"pdg == -14.0 and neta > 0",
    10:"(pdg == 12.0 or pdg == -12.0) and neta > 0",
    11:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    12:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 == 1 and isTPC == 1",
    13:"pdg == 14.0 and iscc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    14:"pdg == 14.0 and isnc == 1.0 and neta == 0 and npi0 != 1 and isTPC == 1",
    15:"(pdg == 12 or pdg == -12 or pdg == -14) and neta == 0 and isTPC == 1",
    16:"(pdg == 14 or pdg == -14 or pdg == 12 or pdg == -12) and isTPC == 0",
    17:"pdg == -1",
    #18:"(pdg != 14 and pdg != -14 and pdg != 12 and pdg != -12 and pdg != -1)"
    18:"pdg == 14 and mode == 1.0 and neta > 0 and npi0 > 3 and isTPC == 1"
    
}

topology_colors1 = {
    0:"magenta", 
    1:"purple",
    2:"violet",
    3:"deeppink",
    4:"blue",
    5:"navy",
    6:"royalblue",
    7:"lavender",
    8:"indigo",
    9:"green",
    10:"lime",
    11:"cyan",
    12:"aquamarine",
    13:"red",
    14:"maroon",
    15:"orange",
    16:"coral",
    17:"gray",
    18:"red"
}



In [None]:
for num in range(len(topology_selections1.keys())):
    #temp = slc_df.query(topology_selections[num])
    condition = slc_df.index.isin(slc_df.query(topology_selections1[num]).index)
    slc_df.loc[condition, "TOP"] = num

slc_reco_df["TOP"] = slc_df["TOP"]
slc_reco_df[:2]

In [None]:
df_small_filtered = slc_df[['run', 'subrun', 'evt', 'slc', 'TOP']]
shw_df1 = shw_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
trk_df1 = trk_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
particle_df1 = particle_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
#particle_df2 = particle_df2.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')

shw_df1[:2]

In [None]:
shw_df2[:2]

In [None]:
shw_df2["TOP"] = shw_df1["TOP"]
trk_df2["TOP"] = trk_df1["TOP"]
particle_df2["TOP"] = particle_df1["TOP"]

In [None]:
top = 0

particle_df1 = particle_df1.query("TOP == "+str(top))
particle_df2 = particle_df2.query("TOP == "+str(top))
shw_df1 = shw_df1.query("TOP == "+str(top))
shw_df2 = shw_df2.query("TOP == "+str(top))
trk_df1 = trk_df1.query("TOP == "+str(top))
trk_df2 = trk_df2.query("TOP == "+str(top))

print("Grabbed topology "+str(top))

In [None]:
particle_df1[:2]

In [None]:
particle_df2[:2]

In [None]:
particle_df1["start_x"] = particle_df2["start_x"]
particle_df1["start_y"] = particle_df2["start_y"]
particle_df1["start_z"] = particle_df2["start_z"]

particle_df1["end_x"] = particle_df2["end_x"]
particle_df1["end_y"] = particle_df2["end_y"]
particle_df1["end_z"] = particle_df2["end_z"]

In [None]:
def tpc_xy(xy):
    if -200 <= xy <= 200:
        return 1
    else:
        return 0
        
def tpc_z(z):
    if 0 <= z <= 500:
        return 1
    else:
        return 0


def is_true_cont(row):
    sx, sy, sz = row["start_x"], row["start_y"], row["start_z"]
    ex, ey, ez = row["end_x"], row["end_y"], row["end_z"]

    if tpc_xy(sx) and tpc_xy(sy) and tpc_xy(ex) and tpc_xy(ey):
        if tpc_z(sz) and tpc_z(sz):
            return 1
        return 0
    return 0
    
particle_df1["cont"] = particle_df1.apply(is_true_cont, axis=1)
particle_df1[:2]

In [None]:
contained_gammas = particle_df1.query("pdg == 22 and cont == 1")
contained_gammas[:2]

In [None]:
shw_df2["trackScore"] = shw_df1["trackScore"]
shw_df2[:2]

In [None]:
#df_small_filtered = shw_df1[['run', 'subrun', 'evt', 'slc', 'bestmatch_G4ID', "conv_gap"]]
#contained_gammas = contained_gammas.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc', 'bestmatch_G4ID'], how='left')

VAR = ""
def get_thing(row):
    #print(VAR)
    g4id = row["G4ID"]
    r = row["run"]
    sr = row["subrun"]
    e = row["evt"]
    slc = row["slc"]
    q = "run == " + str(r) + " and subrun == "+str(sr) + " and evt == "+str(e) + " and slc == "+str(slc) + " and bestmatch_G4ID == "+str(g4id)
    v = 0
    try:
        v = shw_df2.query(q)[VAR].values[0]
    except:
        v = -999999
        #print("No Match ???")
    #print(v)
    return v


VAR = "conv_gap"

contained_gammas["conv_gap"] = contained_gammas.apply(get_thing, axis=1)
contained_gammas[:2]

In [None]:
plt.hist(contained_gammas.query("conv_gap >= 0.0")["conv_gap"].values, bins=50, histtype="step", linewidth=2, label=topology_labels1[top])
plt.title("True Contained Gammas")
plt.xlabel("Reconstructed Conversion Gap [cm]", fontsize=14)
plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.legend()
plt.show()

In [None]:
shw_df2["bestplane_energy"] = shw_df1["bestplane_energy"]

shw_df2[:2]

In [None]:
VAR = "bestplane_energy"
contained_gammas["bestplane_energy"] = contained_gammas.apply(get_thing, axis=1)

contained_gammas[:4]

In [None]:
x = contained_gammas.query("bestplane_energy > 0")["genE"].values
y = contained_gammas.query("bestplane_energy > 0")["bestplane_energy"].values

plt.title("True Contained Gammas")
hist = plt.hist2d(x, y, bins=100, label=topology_labels1[0], density=False, cmap='rainbow')
plt.colorbar(hist[3], label='Counts')
plt.xlabel("True genE [GeV]", fontsize=14)
plt.ylabel("Best Plane Energy [GeV]", fontsize=14)
#plt.legend()
plt.show()

In [None]:
from matplotlib import cm
x = contained_gammas.query("0 < bestplane_energy < 0.7")["genE"].values
y = contained_gammas.query("0 < bestplane_energy < 0.7")["bestplane_energy"].values

plt.title("True Contained Gammas")
cmap = plt.colormaps['rainbow'].copy()  # Copy the colormap to modify it
cmap.set_under(color='white')  
hist = plt.hist2d(x, y, bins=50, label=topology_labels1[0], density=False, cmap=cmap, vmin=0.001)
plt.scatter([], [], color='white', label=topology_labels1[top])
plt.plot([0, 0.7], [0, 0.7], linestyle="--", c="r", linewidth=2)
plt.colorbar(hist[3], label='Counts')
plt.xlabel("True genE [GeV]", fontsize=14)
plt.ylabel("Best Plane Energy [GeV]", fontsize=14)
plt.legend(loc="lower right")
plt.show()

In [None]:
plt.hist(contained_gammas.query("bestplane_energy < 0")["genE"].values, bins=100)
plt.show()

In [None]:
plt.hist(contained_gammas.query("bestplane_energy < 0")["start_x"].values, bins=50, histtype="step", label="X")
plt.hist(contained_gammas.query("bestplane_energy < 0")["start_y"].values, bins=50, histtype="step", label="Y")
plt.hist(contained_gammas.query("bestplane_energy < 0")["start_z"].values, bins=50, histtype="step", label="Z")
plt.xlabel("TPC Start Position [cm]")
plt.legend()
plt.show()

In [None]:
N_missing = contained_gammas.query("bestplane_energy < 0").shape[0]

N_cont = contained_gammas.shape[0]

print((1.0*N_missing)/N_cont)

In [None]:
VAR = "trackScore"

contained_gammas["trackScore"] = contained_gammas.apply(get_thing, axis=1)
contained_gammas[:2]

In [None]:
plt.hist(contained_gammas.query("-5 < trackScore < 2")["trackScore"].values, 
                 bins=20, histtype="step", linewidth=2, color="blue", label=topology_labels1[0])

plt.title("Primary Gammas", fontsize=20)
plt.xlabel("Track Score", fontsize=14)
plt.ylabel("Counts/bin/"+str(TOT_POT)+" POT", fontsize=14)
#plt.yscale("log")
plt.legend(ncol=2)
plt.show()