In [None]:
# Let's Explore the Neutrino pile up normalized by events

In [None]:
import ROOT
import numpy as np
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import sys
import multiprocessing as mp
import uproot
import pandas as pd
import pickle
import h5py
import gzip
import math
import timeit

# First attempt
#infile = "../NTuples/eta_production_ntuple_test_nodirt.root"

# Latest Eta Production
#infile = "../NTuples/eta_production_nodirt_ntuple_batch1.root"

# MC CV Sample --> Low Stats
#infile = "../NTuples/mc_production_with_fmatch_ntuple.root"

#MC CV Sample --> High Stats
#infile = "../NTuples/mc_production_with_fmatch_ntuple_more_stats.root"
infile = "../NTuples/mc_production_with_fmatch_ntuple_more_stats_v2.root"

In [None]:
inFile = uproot.open(infile)

inFileROOT = ROOT.TFile.Open(infile, "READ")
#h_tot_pot = inFileROOT.Get("TotalPOT")
h_tot_pot = inFileROOT.Get("TOTPOT_Clone")
TOT_POT = h_tot_pot.GetBinContent(1)
inFileROOT.Close()
TOT_POT = f"{TOT_POT:.2e}"
print("Total POT", TOT_POT)

#slc_tree = inFile["slc_truth_tree"]
slc_tree = inFile["slc_truth_tree"]
slc_reco_tree = inFile["slc_tree"]
pfp_tree = inFile["pfp_tree"]

cosmic_tree1 = inFile["cosmic_tree1"]
cosmic_tree2 = inFile["cosmic_tree2"]

particle_tree1 = inFile["particle_tree1"]
particle_tree2 = inFile["particle_tree2"]

pfp_df = pfp_tree.arrays(pfp_tree.keys(), library="pd")

slc_df = slc_tree.arrays(slc_tree.keys(), library="pd")
slc_reco_df = slc_reco_tree.arrays(slc_reco_tree.keys(), library="pd")


particle_df1 = particle_tree1.arrays(particle_tree1.keys(), library="pd")
particle_df2 = particle_tree2.arrays(particle_tree2.keys(), library="pd")

cosmic_df1 = cosmic_tree1.arrays(cosmic_tree1.keys(), library="pd")
cosmic_df2 = cosmic_tree2.arrays(cosmic_tree2.keys(), library="pd")

slc_df[:2]

In [None]:
def isTPC(row):
    if (-200 <= row["vtx_x"] <= 200) and (-200 <= row["vtx_y"] <= 200) and (0 <= row["vtx_z"] <= 500):
        return 1
    else:
        return 0

slc_df["inTPC"] = slc_df.apply(isTPC, axis=1)
slc_reco_df["inTPC"] = slc_reco_df.apply(isTPC, axis=1)
slc_df[:2]

In [None]:
def map_pdg_counts_to_slices(slc_df_t, other_df, pdg, new_col):
    filtered = other_df[other_df['pdg'] == pdg]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt', 'slc']).size()

    # Step 3: Map the counts to the smaller DataFrame
    slc_df_t[new_col] = slc_df_t.set_index(['run', 'subrun', 'evt', 'slc']).index.map(counts).fillna(0).astype(int)


map_pdg_counts_to_slices(slc_df, particle_df1, 111, "npi0")
map_pdg_counts_to_slices(slc_df, particle_df1, 221, "neta")
map_pdg_counts_to_slices(slc_df, particle_df1, 22, "ngamma")
map_pdg_counts_to_slices(slc_df, particle_df1, 13, "nmuminus")

slc_df[:2]

In [None]:
topology_labels = {
    0:r"$\nu_{\mu} CC$",
    1:r"$\nu_{\mu} NC$",
    2:r"$\nu_{e} CC$",
    3:r"$\nu_{e} NC$",
    4:r"$\bar{\nu}_{\mu}$",
    5:r"$\bar{\nu}_{e}$",
    6:r"DIRT $\nu$",
    7:"Cosmic",
}

topology_selections = {
    0:"pdg == 14.0 and iscc == 1.0 and inTPC == 1",
    1:"pdg == 14.0 and isnc == 1.0 and inTPC == 1",
    2:"pdg == 12.0 and iscc == 1.0 and inTPC == 1",
    3:"pdg == 12.0 and isnc == 1.0 and inTPC == 1",
    4:"pdg == -14.0 and inTPC == 1",
    5:"pdg == -12.0 and inTPC == 1",
    6:"(pdg == 14.0 or pdg == 12.0 or pdg == -14.0 or pdg == -12.0 ) and inTPC == 0",
    7:"pdg == -1",
}

In [None]:
for num in range(len(topology_selections.keys())):
    #temp = slc_df.query(topology_selections[num])
    condition = slc_df.index.isin(slc_df.query(topology_selections[num]).index)
    slc_df.loc[condition, "TOP"] = num

slc_reco_df["TOP"] = slc_df["TOP"]
slc_reco_df[:2]

In [None]:
# Add topology labels to other dataframes
df_small_filtered = slc_df[['run', 'subrun', 'evt', 'slc', 'TOP']]

pfp_df = pfp_df.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
#cosmic_df1 = cosmic_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
particle_df1 = particle_df1.merge(df_small_filtered, on=['run', 'subrun', 'evt', 'slc'], how='left')
slc_reco_df["TOP"] = slc_df["TOP"]
particle_df1[:2]

In [None]:
runs = slc_df["run"].values
subruns = slc_df["subrun"].values
evts = slc_df["evt"].values

data = {'run': runs,
        'subrun': subruns,
        'evt': evts}

event_df = pd.DataFrame(data)

event_df = event_df.drop_duplicates()

# Drop duplicates to find unique combinations
#unique_combinations = event_df.drop_duplicates()

# Get the number of unique combinations
N_EVENTS = event_df.shape[0]

print("N_EVENTS:", N_EVENTS)


# Get the Event Normalization
#N_EVENTS = slc_df.groupby(['run', 'subrun']).size()


In [None]:
def map_top_to_events(evt_df_t, other_df, top, new_col):
    filtered = other_df[other_df['TOP'] == top]

    # Step 2: Group by run and subrun and count occurrences
    counts = filtered.groupby(['run', 'subrun', 'evt']).size()

    # Step 3: Map the counts to the smaller DataFrame
    evt_df_t[new_col] = evt_df_t.set_index(['run', 'subrun', 'evt']).index.map(counts).fillna(0).astype(int)


for num in range(len(topology_labels.keys())):
    map_top_to_events(event_df, slc_df, num, str(num))

event_df[:2]

In [None]:
# Before any beam window selection

B = np.linspace(0.5, 30.5, 32)

print(B)

for num in range(len(topology_labels.keys())):
    d = event_df[str(num)].values
    w = np.ones_like(d)*(1.0/N_EVENTS)
    plt.hist(d, bins=B, weights=w, histtype="step", linewidth=2, label=topology_labels[num])

plt.legend()

plt.xlabel("Slice Multiplicity [Counts]", fontsize=14)
plt.ylabel("Counts/Event/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.yscale("log")
plt.title("No Beam Window Selection", fontsize=20)
plt.show()


In [None]:
B= np.linspace(0, 5, 20)
plt.hist(slc_reco_df.query("TOP == 7 and (0 < fmatch_time < 5)")["fmatch_time"], bins=B, 
         histtype="step", linewidth=2, label="Cosmic")

plt.hist(slc_reco_df.query("TOP != 7 and (0 < fmatch_time < 5)")["fmatch_time"], bins=B, 
         histtype="step", linewidth=2, label=r"$\nu$")
plt.xlabel("Flash Match Time", fontsize=14)
plt.ylabel("Slice Count/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.legend()
plt.show()

In [None]:
slc_reco_df[:2]

In [None]:
B= np.linspace(0, 5, 20)
plt.hist(slc_reco_df.query("TOP == 7 and (0 < fmatch_time < 5) and (is_clear_cosmic == 0.0)")["fmatch_time"], bins=B, 
         histtype="step", linewidth=2, label="Cosmic")

plt.hist(slc_reco_df.query("TOP != 7 and (0 < fmatch_time < 5) and (is_clear_cosmic == 0.0)")["fmatch_time"], bins=B, 
         histtype="step", linewidth=2, label=r"$\nu$")
plt.xlabel("Flash Match Time", fontsize=14)
plt.ylabel("Slice Count/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.legend()
plt.show()

In [None]:
# First, make the clear cosmic cut
slc_reco_df_clear = slc_reco_df.query("is_clear_cosmic == 0.0")

for num in range(len(topology_labels.keys())):
    map_top_to_events(event_df, slc_reco_df_clear, num, str(num))

event_df[:2]

In [None]:
# Before any beam window selection

#B = np.linspace(0.5, 30.5, 32)
B = np.arange(0.5, 30.5, 1)

print(B)

for num in range(len(topology_labels.keys())):
    d = event_df[str(num)].values
    w = np.ones_like(d)*(1.0/N_EVENTS)
    plt.hist(d, bins=B, weights=w, histtype="step", linewidth=2, label=topology_labels[num])

plt.legend()

plt.xlabel("Slice Multiplicity [Counts]", fontsize=14)
plt.ylabel("Counts/Event/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.yscale("log")
plt.title("Clear Cosmic Selection", fontsize=20)
plt.show()

In [None]:
# cut on the beam window

slc_reco_df_clear_beam = slc_reco_df.query("(is_clear_cosmic == 0.0) and (0 <= fmatch_time <= 2.1)")

for num in range(len(topology_labels.keys())):
    map_top_to_events(event_df, slc_reco_df_clear_beam, num, str(num))

event_df[:2]

In [None]:

bin_centers = (B[:-1] + B[1:]) / 2  # calculate bin centers
bin_width = B[1] - B[0]

count_arr = []

for num in range(len(topology_labels.keys())):
    d = event_df[str(num)].values
    w = np.ones_like(d)*(1.0/N_EVENTS)
    c, e, _ = plt.hist(d, bins=B, weights=w, histtype="step", linewidth=2, label=topology_labels[num])
    if num != 6 and num != 7:
        count_arr.append(c*N_EVENTS)

all_nus = np.zeros_like(count_arr[0])

for c in count_arr:
    all_nus += c



plt.errorbar(bin_centers, all_nus/N_EVENTS, 
            xerr=np.ones_like(all_nus)*0.5, yerr=np.sqrt(all_nus)/N_EVENTS, fmt="o", c="black", label=r"$\nu$")

plt.legend()

plt.xlabel("Slice Multiplicity [Counts]", fontsize=14)
plt.ylabel("Counts/Event/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.xlim([0, 10])
plt.yscale("log")
plt.title("Clear Cosmic Selection + Beam Window", fontsize=20)
plt.show()

In [None]:
slc_reco_df_beam = slc_reco_df.query("0 <= fmatch_time <= 2.1")

for num in range(len(topology_labels.keys())):
    map_top_to_events(event_df, slc_reco_df_beam, num, str(num))

event_df[:2]

In [None]:
count_arr = []

all_counts = []
for num in range(len(topology_labels.keys())):
    d = event_df[str(num)].values
    w = np.ones_like(d)*(1.0/N_EVENTS)
    c, e, _ = plt.hist(d, bins=B, weights=w, histtype="step", linewidth=2, label=topology_labels[num])
    all_counts.append(c*N_EVENTS)
    if num != 6 and num != 7:
        count_arr.append(c*N_EVENTS)

all_nus = np.zeros_like(count_arr[0])

for c in count_arr:
    all_nus += c

plt.errorbar(bin_centers, all_nus/N_EVENTS, 
            xerr=np.ones_like(all_nus)*0.5, yerr=np.sqrt(all_nus)/N_EVENTS, fmt="o", c="black", label=r"$\nu$")

plt.legend()

plt.xlabel("Slice Multiplicity [Counts]", fontsize=14)
plt.ylabel("Counts/Event/bin/"+str(TOT_POT)+" POT", fontsize=14)
plt.xlim([0, 10])
plt.yscale("log")
plt.title("Beam Window Selection Only", fontsize=20)
plt.show()

In [None]:
print((1.0*sum(all_counts[-1]))/N_EVENTS)
print((1.0*sum(all_counts[-2]))/N_EVENTS)

nu_sum = 0
for c in count_arr:
    nu_sum += sum(c)
    
print((1.0*nu_sum)/N_EVENTS)

In [None]:
def get_event_nu_multiplicity(row):
    r, sr, e = row["run"], row["subrun"], row["evt"]
    m1 = (slc_df["run"].values == r)
    m2 = (slc_df["subrun"].values == sr)
    m3 = (slc_df["evt"].values == e)
    m = m1 & m2 & m3
    tops = np.array(slc_df["TOP"].values)[m]
    m1 = (tops != 7) # not cosmic
    m2 = (tops != 6) # not dirt
    m = m1 & m2
    n = sum(m)
    return n

print("made an event selection function")

event_df["N_AV_NU"] = event_df.apply(get_event_nu_multiplicity, axis=1)
event_df[:2]

In [None]:
N_nu = 1.0*event_df.query("N_AV_NU > 0").shape[0]

print(N_nu/event_df.shape[0])

# Let's Try an Event Loop

In [None]:
# loop over each unique event
N_nu = 0
for num in range(event_df.shape[0]):
    tops = get_event_tops(event_df.iloc[num])
    m1 = (tops != 7) # not cosmic
    m2 = (tops != 6) # not dirt
    m = m1 & m2
    n = sum(m)
    if n > 0:
        N_nu += 1

print((1.0*N_nu)/event_df.shape[0])
