In [None]:
import os
import gzip
import json

import uproot
import awkward as ak
import dask
import dask_awkward as dak
import egamma_tnp
from egamma_tnp.triggers import TagNProbeFromNTuples
from egamma_tnp.plot import plot_efficiency
from egamma_tnp.utils.histogramming import save_hists
from matplotlib import pyplot as plt
from coffea.dataset_tools import preprocess
from distributed import Client

In [None]:
client = Client(dashboard_address=":12345")

In [None]:
files = !ls tnptuples/*
fileset = {}

for path in files:
    f = path.split("/")[1][:-5]
    fileset[f] = {"files": {path: "tnpEleTrig/fitter_tree"}}

fileset_available, fileset_updated = preprocess(
    fileset, step_size=500_000, skip_bad_files=True
)

In [None]:
hlt_paths = {
    "Ele30": "passHltEle30WPTightGsf",
    "Ele32": "passHltEle32WPTightGsf",
    "Ele115": "passHltEle115CaloIdVTGsfTrkIdTGsf",
    "Ele135": "passHltEle135CaloIdVTGsfTrkIdTGsf",
    "Ele23Ele12Leg1": "passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg1L1match",
    "Ele23Ele12Leg2": "passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg2",
    "DoubleEle33SeededLeg": "passHltDoubleEle33CaloIdLMWSeedLegL1match",
    "DoubleEle33UnseededLeg": "passHltDoubleEle33CaloIdLMWUnsLeg",
}

plateau_cuts = {
    "Ele30": 35,
    "Ele32": 35,
    "Ele115": 120,
    "Ele135": 140,
    "Ele23Ele12Leg1": 25,
    "Ele23Ele12Leg2": 15,
    "DoubleEle33SeededLeg": 35,
    "DoubleEle33UnseededLeg": 35,
}

triggers = {}

for name, path in hlt_paths.items():
    triggers[name] = TagNProbeFromNTuples(fileset_available, path)

In [None]:
%%time

to_compute = {}

for name, trigger in triggers.items():
    if name == "Ele115" or name == "Ele135":
        egamma_tnp.config.set(
            "ptbins",
            [
                5,
                10,
                15,
                20,
                22,
                26,
                28,
                30,
                32,
                34,
                36,
                38,
                40,
                45,
                50,
                60,
                80,
                100,
                105,
                110,
                115,
                120,
                125,
                130,
                135,
                140,
                145,
                150,
                200,
                250,
                300,
                350,
                400,
            ],
        )
    else:
        egamma_tnp.config.set(
            "ptbins",
            [
                5,
                10,
                12,
                14,
                16,
                18,
                20,
                23,
                26,
                28,
                30,
                32,
                34,
                36,
                38,
                40,
                45,
                50,
                60,
                80,
                100,
                150,
                250,
                400,
            ],
        )
    plateau_cut = plateau_cuts[name]
    to_compute[name] = trigger.get_tnp_histograms(
        uproot_options={"allow_read_errors_with_report": True},
        eta_regions_pt={
            "barrel": [0.0, 1.4442],
            "endcap_loweta": [1.566, 2.0],
            "endcap_higheta": [2.0, 2.5],
        },
        plateau_cut=plateau_cut,
    )


dak.necessary_columns(to_compute)

In [None]:
%%time

out = dask.compute(to_compute)[0]

In [None]:
for dataset in out["Ele30"][1].keys():
    os.mkdir(f"Ravindra_mc/{dataset}")

for name, res in out.items():
    hists, report = res
    for dataset, report_arr in report.items():
        ak.to_json(
            report_arr,
            f"Ravindra_mc/{dataset}/{name}_report.json",
            num_readability_spaces=1,
            num_indent_spaces=4,
        )
    for dataset, hs in hists.items():
        save_hists(f"Ravindra_mc/{dataset}/{name}_hists.root", hs)

In [None]:
plt.figure(figsize=(10, 8))

for folder in fileset_available.keys():
    with uproot.open(f"Ravindra_mc/{folder}/Ele30_hists.root") as file:
        hists = file["eta"]["entire"]
    plot_efficiency(hists["passing"].to_hist(), hists["all"].to_hist(), label=folder)

# plt.xscale("log")
plt.ylim(0, 1.1)
plt.legend()
plt.show()