### Combine new sim files into a single summary dataframe
Include muon gun events as well

In [1]:
import numpy as np
import pandas as pd

In [2]:
# preliminary cut values
LLH_DIFF = -0.1
Qst1 = 2000
Qst2 = 10
Qst3 = 10
RLOGL = 10

In [3]:
# nominal
prefix = '/data/tmg5746/Scores/'

# (filename, n_evts_cut, n_files) 

# CC
CC_files = [('genie_NuTau_1.npy', 10000, 14834),
            ('genie_NuMu_1.npy', 10000, 17239),
            ('genie_NuE_1.npy', 10000, 11991)]

# GR
GR_files = [('genie_NuE_3.npy', 10000, 11991)]

# NC
NC_files = [('genie_NuTau_2.npy', 10000, 14834),
            ('genie_NuMu_2.npy', 10000, 17239),
            ('genie_NuE_2.npy', 10000, 11991)]

# atmos

# muon gun separately
mg_file = [('/fastio2/dasha/double_pulse/data_MuonGun_all.npy', None, None, None)]

outname = 'nominal_new_sim'

In [4]:
files_to_load = CC_files + GR_files + NC_files + mg_file
files_to_load

[('genie_NuTau_1.npy', 10000, 14834),
 ('genie_NuMu_1.npy', 10000, 17239),
 ('genie_NuE_1.npy', 10000, 11991),
 ('genie_NuE_3.npy', 10000, 11991),
 ('genie_NuTau_2.npy', 10000, 14834),
 ('genie_NuMu_2.npy', 10000, 17239),
 ('genie_NuE_2.npy', 10000, 11991),
 ('/fastio2/dasha/double_pulse/data_MuonGun_all.npy', None, None, None)]

### Combine all the event types into a single dataframe

In [5]:
field_getters = {"qtot": lambda a: a["qtot"],
                 "llh_diff": lambda a: a["logan_veto"]["SPE_rlogl"] - a["logan_veto"]["Cascade_rlogl"],
                 "spe_rlogl": lambda a: a["logan_veto"]["SPE_rlogl"],
                 "qst0": lambda a: a["qst"]["q"][:, 0],
                 "qst1": lambda a: a["qst"]["q"][:, 1],
                 "qst2": lambda a: a["qst"]["q"][:, 2],
                 "stnum0": lambda a: a["qst"]["num"][:, 0],
                 "stnum1": lambda a: a["qst"]["num"][:, 1],
                 "stnum2": lambda a: a["qst"]["num"][:, 2],
                 "n1": lambda a: a["preds"]["n1"],
                 "n2": lambda a: a["preds"]["n2"],
                 "n3": lambda a: a["preds"]["n3"],
                 "pid": lambda a: a["primary"]["pdg"],
                 "it": lambda a: a["weight_dict"]["InteractionType"],
                 "coszen": lambda a: np.cos(a["primary"]["direction"][:, 0]),
                 "energy": lambda a: a["primary"]["energy"],
                 "n_events": lambda a: a["weight_dict"]["NEvents"],
                 "n_files": lambda a: np.zeros(len(a)),
                 "typeweight": lambda a: a["weight_dict"]["TypeWeight"],
                 "oneweight": lambda a: a["weight_dict"]["OneWeight"],
                }

df_dict = {key: np.array([]) for key in field_getters.keys()}
for fname, n_events, n_files in files_to_load[:-1]:
    print(f"loading {fname}")
    array = np.load(f"{prefix}{fname}")
    # select the desired dataset
    array = array[array["weight_dict"]["NEvents"] == n_events]
    
    start_ind = len(df_dict["n_files"])
    for key in field_getters.keys():
        df_dict[key] = np.hstack((df_dict[key], field_getters[key](array)))
    df_dict["n_files"][start_ind:] = n_files
    
# load muon gun separately
mg_file = files_to_load[-1][0]
print(f"Loading {mg_file}")
mg = np.load(mg_file)
for key in field_getters.keys():
    if key == "n2":
        new_vals = mg["preds"]["n2_1"]
    elif key == "it" or key == "n_events" or key == "typeweight":
        new_vals = np.full(len(mg), 0)
    elif key == "oneweight":
        new_vals = mg["weight_val"]
    else:
        new_vals = field_getters[key](mg)
    df_dict[key] = np.hstack((df_dict[key], new_vals))
    
df = pd.DataFrame(df_dict)

loading genie_NuTau_1.npy
loading genie_NuMu_1.npy
loading genie_NuE_1.npy
loading genie_NuE_3.npy
loading genie_NuTau_2.npy
loading genie_NuMu_2.npy
loading genie_NuE_2.npy
Loading /fastio2/dasha/double_pulse/data_MuonGun_all.npy


### Apply preliminary cuts

In [6]:
q_mask = (df.qst0 > Qst1) & (df.qst1 > Qst2) & (df.qst2 > Qst3)
rlogl_mask = (df.spe_rlogl < RLOGL)
llh_diff_mask = (df.llh_diff > LLH_DIFF)
df = pd.DataFrame(df[q_mask & rlogl_mask & llh_diff_mask])

In [7]:
# calculate string charge asymmetry
df["q_asym"] = 1 - df.qst0 / (df.qst0 + df.qst1 + df.qst2)

### save the dataframe

In [8]:
import datetime
date_str = datetime.datetime.now().strftime("%m_%d_%Y")
df.to_hdf(f"{outname}_{date_str}.h5", key="df", mode="w")