# Root Data File Investigation
Common snippets to uncover what's inside a root file.

## Loading

In [None]:
# INTRO
import os
from frame.file_structure import PROJECT_ROOT

os.chdir(PROJECT_ROOT)

%pwd

In [None]:
# Mutex for file loading
is_file_local: bool = True

In [None]:
# FOR LOCAL FILE
if is_file_local:
    file_name = "data/cms-open-data/0922B4A4-59D8-2C4E-837E-D5832DEBC7D5.root"

In [None]:
from os import popen


# REMOTE XROOTD/HTTP FILE
if not is_file_local:
    
    # Specify the record ID for the CERN Open Data file and index within it
    recid = 69543
    index = 1

    # Get urls for file from CERN opendata record id
    with popen(f"cernopendata-client get-file-locations --recid {recid}") as process:
        data_files = process.readlines()

    file_name = data_files

In [None]:
# Loaded DataSet props equivalent to json file input
dataset_loaded__observables_to_load = [
    "nTau",
    "Tau_eta",
    "Tau_pt",
    "Tau_phi",
    "Tau_charge",
    "nMuon",
    "Muon_pt",
    "Muon_eta",
    "Muon_phi",
    "Muon_charge",
    "nElectron",
    "Electron_pt",
    "Electron_eta",
    "Electron_phi",
    "Electron_charge",
    "nJet",
    "Jet_eta",
    "Jet_pt",
    "Jet_phi",
    "Jet_mass",
]
dataset_loaded__cut = "(nTau == 1) & (nMuon == 0) & (nElectron == 1)"
dataset_loaded__aliases = None
dataset_loaded__event_amount_load_limit = 100000

In [None]:
# Detection props
detector__detect_observable_names = [
    "Electron_pt_0",
    "Muon_pt_0",
]
detector__binning_maxima = 600
detector__binning_number_of_bins = 100

In [None]:
from frame.file_system.root_reader import load_root_events

events = load_root_events(
    file_name,
    branch_names=dataset_loaded__observables_to_load,
    cut=dataset_loaded__cut,
    aliases=dataset_loaded__aliases,
    stop=dataset_loaded__event_amount_load_limit,
)

In [None]:
events._data.describe()

In [None]:
import awkward as ak
import numpy as np
import vector
from particle import Particle
from particle.pdgid.literals import tau_minus, mu_minus, e_minus

vector.register_awkward()

# Post query event cuts
is_tau_event = events.slice_along_observable_names(["nTau"]).flatten() > 0
is_muon_event = events.slice_along_observable_names(["nMuon"]).flatten() > 0
is_electron_event = events.slice_along_observable_names(["nElectron"]).flatten() > 0
                         
# Enabling particle definitions if cuts sufficient to not create nan particles
if all(is_tau_event):
    leading_tau_particles = ak.zip(
        {
            "pt": events.slice_along_observable_names(["Tau_pt_0"]),
            "eta": events.slice_along_observable_names(["Tau_eta_0"]),
            "phi": events.slice_along_observable_names(["Tau_phi_0"]),
            "mass": np.ones_like(is_tau_event) * Particle.from_pdgid(tau_minus).mass,
        },
        with_name="Momentum4D",
        behavior=vector.backends.awkward.behavior,
    )

    if all(is_muon_event):
        leading_muon_particles = ak.zip(
            {
                "pt": events.slice_along_observable_names(["Muon_pt_0"]),
                "eta": events.slice_along_observable_names(["Muon_eta_0"]),
                "phi": events.slice_along_observable_names(["Muon_phi_0"]),
                "mass": np.ones_like(is_muon_event) * Particle.from_pdgid(mu_minus).mass,
            },
            with_name="Momentum4D",
            behavior=vector.backends.awkward.behavior,
        )

        Tau_Muon_angle = np.array([
            leading_tau_particles[i].deltaangle(leading_muon_particles[i])
            for i in range(len(leading_tau_particles))
        ])
        Tau_Muon_invariant_mass = np.array([
            (leading_tau_particles[i] + leading_muon_particles[i]).mass
            for i in range(len(leading_tau_particles))
        ])
    if all(is_electron_event):
        leading_electron_particles = ak.zip(
            {
                "pt": events.slice_along_observable_names(["Electron_pt_0"]),
                "eta": events.slice_along_observable_names(["Electron_eta_0"]),
                "phi": events.slice_along_observable_names(["Electron_phi_0"]),
                "mass": np.ones_like(is_electron_event) * Particle.from_pdgid(e_minus).mass,
            },
            with_name="Momentum4D",
            behavior=vector.backends.awkward.behavior,
        )

        Tau_Electron_angle = np.array([
            leading_tau_particles[i].deltaangle(leading_electron_particles[i])
            for i in range(len(leading_tau_particles))
        ])
        Tau_Electron_invariant_mass = np.array([
            (leading_tau_particles[i] + leading_electron_particles[i]).mass
            for i in range(len(leading_tau_particles))
        ])

In [None]:
from data_tools.detector.detector_config import DetectorConfig


detector_config = DetectorConfig(
    detector__detect_observable_names=detector__detect_observable_names,
    detector__binning_maxima=detector__binning_maxima,
    detector__binning_number_of_bins=detector__binning_number_of_bins,
)

## Plotting

In [None]:
# Definitions
x_properties = detector__detect_observable_names
property_1_name = detector__detect_observable_names[0]
property_1 = detector_config.slice_along_observable_names([property_1_name])
property_2_name = detector__detect_observable_names[1]
property_2 = detector_config.slice_along_observable_names([property_2_name])
x_units = "MeV"

In [None]:
from matplotlib.pyplot import show, figure

from plot.plot_utils import utils__plot_datset_lfv_comparison

fig = figure()
bin_edges, bin_centers = detector_config.observable_bins(x_properties[0])

utils__plot_datset_lfv_comparison(
    fig,
    property_1,
    property_1_name,
    property_2,
    property_2_name,
    bin_edges,
    bin_centers,
    f"Histogram of densities with cuts {dataset_loaded__cut}",
    x_units,
    "counts",
)

show()