# Root Data File Investigation
Common snippets to uncover what's inside a root file.

## Loading

In [1]:
# INTRO
import os
from frame.file_structure import PROJECT_ROOT

os.chdir(PROJECT_ROOT)

%pwd

In [None]:
# Mutex for file loading
is_file_local: bool = True

In [None]:
# FOR LOCAL FILE
if is_file_local:
    file_name = "data/cms-open-data/0922B4A4-59D8-2C4E-837E-D5832DEBC7D5.root"

In [None]:
from os import popen


# REMOTE XROOTD/HTTP FILE
if not is_file_local:
    
    # Specify the record ID for the CERN Open Data file and index within it
    recid = 69543
    index = 0

    # Get urls for file from CERN opendata record id
    with popen(f"cernopendata-client get-file-locations --recid {recid}") as process:
        data_files = process.readlines()

    file_name = data_files[index]

In [2]:
# Inputs, equivalent to json file input

dataset_loaded__observable_names = [
    "nTau",
    "Tau_eta",
    "Tau_pt",
    "Tau_phi",
    "Tau_mass",
    "nMuon",
    "Muon_pt",
    "Muon_eta",
    "Muon_phi",
    "Muon_mass",
    "nElectron",
    "Electron_pt",
    "Electron_eta",
    "Electron_phi",
    "Electron_mass",
]
dataset_loaded__cut = "(nTau > 0) & (nMuon > 0)"
dataset_loaded__aliases = None
dataset_loaded__event_amount_load_limit = 1000
dataset__detector_binning_maxima = [
    4,
    0.5,
    4,
    0.1056583745,
    4,
    0.000511
]

In [None]:
from frame.file_system.root_reader import load_root_events

events = load_root_events(
    file_name,
    branch_names=dataset_loaded__observable_names,
    cut=dataset_loaded__cut,
    aliases=dataset_loaded__aliases,
    stop=dataset_loaded__event_amount_load_limit,
)

In [11]:
import awkward as ak
from numpy import nan
import vector

vector.register_awkward()

# Post query event cuts
is_tau_event = events.slice_along_observable_names(["nTau"]).flatten() > 0
is_muon_event = events.slice_along_observable_names(["nMuon"]).flatten() > 0
is_electron_event = events.slice_along_observable_names(["nElectron"]).flatten() > 0
                         
# Enabling particle definitions if cuts sufficient to not create nan particles
if all(is_tau_event):
    tau_events = events[is_tau_event]
    leading_tau_particles = ak.zip(
        {
            "pt": events.slice_along_observable_names(["Tau_pt_0"]),
            "eta": events.slice_along_observable_names(["Tau_eta_0"]),
            "phi": events.slice_along_observable_names(["Tau_phi_0"]),
            "mass": events.slice_along_observable_names(["Tau_mass_0"]),
        },
        with_name="Momentum4D",
        behavior=vector.backends.awkward.behavior,
    )

if all(is_muon_event):
    muon_events = events[is_muon_event]
    leading_muon_particles = ak.zip(
        {
            "pt": events.slice_along_observable_names(["Muon_pt_0"]),
            "eta": events.slice_along_observable_names(["Muon_eta_0"]),
            "phi": events.slice_along_observable_names(["Muon_phi_0"]),
            "mass": events.slice_along_observable_names(["Muon_mass_0"]),
        },
        with_name="muon_vector",
        behavior=vector.backends.awkward.behavior,
    )

if all(is_electron_event):
    electron_events = events[is_electron_event]
    leading_electron_particles = ak.zip(
        {
            "pt": events.slice_along_observable_names(["Electron_pt_0"]),
            "eta": events.slice_along_observable_names(["Electron_eta_0"]),
            "phi": events.slice_along_observable_names(["Electron_phi_0"]),
            "mass": events.slice_along_observable_names(["Electron_mass_0"]),
        },
        with_name="electron_vector",
        behavior=vector.backends.awkward.behavior,
    )

if all(is_tau_event):
    if all(is_muon_event):
        Tau_Muon_angle = [
            leading_tau_particles[i].deltaangle(leading_muon_particles[i])
            for i in range(len(leading_tau_particles))
            if leading_tau_particles[i].pt is not nan and leading_muon_particles[i].pt is not nan
        ]
    if all(is_electron_event):
        Tau_Electron_angle = [
            leading_tau_particles[i].deltaangle(leading_electron_particles[i])
            for i in range(len(leading_tau_particles))
            if leading_tau_particles[i].pt is not nan and leading_electron_particles[i].pt is not nan
        ]

In [None]:
events[events.slice_along_observable_names(["nTau"]).flatten() > 0]

## Plotting

In [None]:
# Definitions
d = 1
x_property = "Muon_pt_0"
x_bins = 100

if d != 1:
    y_property = "Muon_pt_1"
    y_bins = x_bins

In [None]:
from matplotlib.pyplot import hist, hist2d, show, title, xlabel, ylabel
from numpy import isnan


# Common preparations
x = events.slice_along_observable_names([x_property])
clean_x = x[~isnan(x)]

if d == 1:    
    # 1d draw
    h = hist(
        clean_x,
        bins=x_bins,
        log=True,
    )

else:
    # d > 1 preparations
    y = events.slice_along_observable_names([y_property])
    clean_y = y[~isnan(x)]

    clean_x = clean_x[~isnan(clean_y.flatten())]
    clean_y = clean_y[~isnan(clean_y)]
    
    # 2d draw
    hist2d(
        x[x >= 1].flatten(),
        y[x >= 1].flatten(),
        bins=[x_bins, y_bins],
        range=[[0, 800],[0, 800]],

        norm="symlog"
    )

xlabel(x_property)
if d == 1:
    ylabel("Counts")
    title(f"Histogram of {x_property}")
else:
    ylabel(y_property)
    title(f"Histogram of {x_property} vs {y_property}")

show()