In [1]:
import os

import awkward as ak
import dask
import dask_awkward as dak
import mplhep as hep
import uproot
from coffea.dataset_tools import preprocess
from distributed import Client
from matplotlib import pyplot as plt

import egamma_tnp
from egamma_tnp import TagNProbeFromNTuples
from egamma_tnp.plot import plot_ratio
from egamma_tnp.utils.histogramming import save_hists

In [2]:
fileset = {
    "data_EGamma_Run2023D": {"files": {"tnptuples/data_EGamma_Run2023D.root": "tnpEleTrig/fitter_tree"}},
    "data_EGamma01_Run2024C": {
        "files": {"tnptuples/data_EGamma01_Run2024C.root": "tnpEleTrig/fitter_tree"},
    },
}

fileset_available, fileset_updated = preprocess(fileset, step_size=500_000, skip_bad_files=True)

In [3]:
hlt_paths = {
    "Ele30": "passHltEle30WPTightGsf",
    "Ele32": "passHltEle32WPTightGsf",
    "Ele115": "passHltEle115CaloIdVTGsfTrkIdTGsf",
    "Ele135": "passHltEle135CaloIdVTGsfTrkIdTGsf",
    "Ele23Ele12Leg1": "passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg1L1match",
    "Ele23Ele12Leg2": "passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg2",
    "DoubleEle33SeededLeg": "passHltDoubleEle33CaloIdLMWSeedLegL1match",
    "DoubleEle33UnseededLeg": "passHltDoubleEle33CaloIdLMWUnsLeg",
}

plateau_cuts = {
    "Ele30": 35,
    "Ele32": 35,
    "Ele115": 120,
    "Ele135": 140,
    "Ele23Ele12Leg1": 25,
    "Ele23Ele12Leg2": 15,
    "DoubleEle33SeededLeg": 35,
    "DoubleEle33UnseededLeg": 35,
}

triggers = {}

for name, path in hlt_paths.items():
    triggers[name] = TagNProbeFromNTuples(fileset_available, path, cutbased_id="passingCutBasedTight122XV1")

triggers

{'Ele30': TagNProbeFromNTuples(passHltEle30WPTightGsf, Number of files: 2, Golden JSON: None),
 'Ele32': TagNProbeFromNTuples(passHltEle32WPTightGsf, Number of files: 2, Golden JSON: None),
 'Ele115': TagNProbeFromNTuples(passHltEle115CaloIdVTGsfTrkIdTGsf, Number of files: 2, Golden JSON: None),
 'Ele135': TagNProbeFromNTuples(passHltEle135CaloIdVTGsfTrkIdTGsf, Number of files: 2, Golden JSON: None),
 'Ele23Ele12Leg1': TagNProbeFromNTuples(passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg1L1match, Number of files: 2, Golden JSON: None),
 'Ele23Ele12Leg2': TagNProbeFromNTuples(passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg2, Number of files: 2, Golden JSON: None),
 'DoubleEle33SeededLeg': TagNProbeFromNTuples(passHltDoubleEle33CaloIdLMWSeedLegL1match, Number of files: 2, Golden JSON: None),
 'DoubleEle33UnseededLeg': TagNProbeFromNTuples(passHltDoubleEle33CaloIdLMWUnsLeg, Number of files: 2, Golden JSON: None)}

In [4]:
%%time

to_compute = {}

for name, trigger in triggers.items():
    if name == "Ele115" or name == "Ele135":
        egamma_tnp.config.set(
            "pt_bins",
            [
                5,
                10,
                15,
                20,
                22,
                26,
                28,
                30,
                32,
                34,
                36,
                38,
                40,
                45,
                50,
                60,
                80,
                100,
                105,
                110,
                115,
                120,
                125,
                130,
                135,
                140,
                145,
                150,
                200,
                250,
                300,
                350,
                400,
            ],
        )
    else:
        egamma_tnp.config.set(
            "pt_bins",
            [
                5,
                10,
                12,
                14,
                16,
                18,
                20,
                23,
                26,
                28,
                30,
                32,
                34,
                36,
                38,
                40,
                45,
                50,
                60,
                80,
                100,
                150,
                250,
                400,
            ],
        )
    plateau_cut = plateau_cuts[name]
    to_compute[name] = trigger.get_1d_pt_eta_phi_tnp_histograms(
        uproot_options={"allow_read_errors_with_report": True},
        eta_regions_pt={
            "barrel": [0.0, 1.4442],
            "endcap_loweta": [1.566, 2.0],
            "endcap_higheta": [2.0, 2.5],
        },
        plateau_cut=plateau_cut,
    )


dak.necessary_columns(to_compute)

CPU times: user 10.4 s, sys: 43.9 ms, total: 10.5 s
Wall time: 10.5 s


{'from-uproot-f501bf161717402237baf550a491c18e': frozenset({'el_eta',
            'el_phi',
            'el_pt',
            'el_q',
            'pair_mass',
            'passHltDoubleEle33CaloIdLMWSeedLegL1match',
            'passHltDoubleEle33CaloIdLMWUnsLeg',
            'passHltEle115CaloIdVTGsfTrkIdTGsf',
            'passHltEle135CaloIdVTGsfTrkIdTGsf',
            'passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg1L1match',
            'passHltEle23Ele12CaloIdLTrackIdLIsoVLLeg2',
            'passHltEle30WPTightGsf',
            'passHltEle32WPTightGsf',
            'passingCutBasedTight122XV1',
            'tag_Ele_eta',
            'tag_Ele_pt',
            'tag_Ele_q'}),
 'from-uproot-2135da1a1c23160cada49e2ed9665d29': frozenset({'el_eta',
            'el_phi',
            'el_pt',
            'el_q',
            'pair_mass',
            'passHltDoubleEle33CaloIdLMWSeedLegL1match',
            'passHltDoubleEle33CaloIdLMWUnsLeg',
            'passHltEle115CaloIdVTGsfTrkIdTGsf',
      

In [5]:
client = Client()

In [6]:
%%time

out = dask.compute(to_compute)[0]

CPU times: user 6.53 s, sys: 177 ms, total: 6.71 s
Wall time: 8.84 s


In [7]:
for dataset in out["Ele30"][1].keys():
    os.mkdir(f"steam_may_2024/{dataset}")

for name, res in out.items():
    hists, report = res
    for dataset, report_arr in report.items():
        ak.to_json(
            report_arr,
            f"steam_may_2024/{dataset}/{name}_report.json",
            num_readability_spaces=1,
            num_indent_spaces=4,
        )
    for dataset, hs in hists.items():
        save_hists(f"steam_may_2024/{dataset}/{name}_hists.root", hs)

In [8]:
hep.style.use("CMS")
hep.style.use(
    {
        "figure.figsize": (6.4, 4.8),
        "font.size": 14,
        "legend.title_fontsize": 14,
        "savefig.bbox": "tight",
    }
)


def get_histograms(path):
    with uproot.open(path) as file:
        hpt_barrel_pass = file["pt/barrel/passing"].to_hist()
        hpt_barrel_fail = file["pt/barrel/failing"].to_hist()
        hpt_endcap_loweta_pass = file["pt/endcap_loweta/passing"].to_hist()
        hpt_endcap_loweta_fail = file["pt/endcap_loweta/failing"].to_hist()
        hpt_endcap_higheta_pass = file["pt/endcap_higheta/passing"].to_hist()
        hpt_endcap_higheta_fail = file["pt/endcap_higheta/failing"].to_hist()
        hpt_combined_pass = hpt_barrel_pass + hpt_endcap_loweta_pass + hpt_endcap_higheta_pass
        hpt_combined_fail = hpt_barrel_fail + hpt_endcap_loweta_fail + hpt_endcap_higheta_fail

        heta_entire_pass = file["eta/entire/passing"].to_hist()
        heta_entire_fail = file["eta/entire/failing"].to_hist()

        hphi_entire_pass = file["phi/entire/passing"].to_hist()
        hphi_entire_fail = file["phi/entire/failing"].to_hist()

    return (
        hpt_barrel_pass,
        hpt_barrel_fail,
        hpt_endcap_loweta_pass,
        hpt_endcap_loweta_fail,
        hpt_endcap_higheta_pass,
        hpt_endcap_higheta_fail,
        hpt_combined_pass,
        hpt_combined_fail,
        heta_entire_pass,
        heta_entire_fail,
        hphi_entire_pass,
        hphi_entire_fail,
    )


def pt_low_threshold_plot_setup(**legend_kwargs):
    plt.xlim(10, 400)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $P_T$ [GeV]")
    plt.ylabel(r"Efficiency")
    plt.xscale("log")
    plt.xticks([10, 100], [10, 100])
    plt.xticks(
        [20, 30, 40, 50, 60, 70, 80, 90, 200, 300, 400],
        [20, 30, 40, 50, None, None, None, None, 200, 300, 400],
        minor=True,
    )
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


def pt_high_threshold_plot_setup(**legend_kwargs):
    plt.xlim(10, 400)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $P_T$ [GeV]")
    plt.ylabel(r"Efficiency")
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


def eta_plot_setup(**legend_kwargs):
    plt.xlim(-2.5, 2.5)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $\eta$")
    plt.ylabel(r"Efficiency")
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


def phi_plot_setup(**legend_kwargs):
    plt.xlim(-3.32, 3.32)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $\phi$")
    plt.ylabel(r"Efficiency")
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


lumis = {
    "2022C": 5.0707,
    "2022D": 3.0063,
    "2022E": 5.8783,
    "2022F": 18.0070,
    "2022G": 3.1219,
    "2023B": 0.622,
    "2023C": 17.060,
    "2023D": 9.525,
}

In [9]:
for tala in list(hlt_paths.keys()):
    for data_period in ["data_EGamma01_Run2024C"]:
        for mc_dataset in ["data_EGamma_Run2023D"]:
            tocompare = [data_period, mc_dataset]
            run = []
            for folder in tocompare:
                run.append(folder.split("_")[2][3:] if "data" in folder else folder.split("_", 1)[1])
            threshold = tala
            if threshold == "Ele32" or threshold == "Ele30":
                suffix = "WPTight_Gsf"
            elif threshold == "Ele115" or threshold == "Ele135":
                suffix = "CaloIdVT_GsfTrkIdT"
            elif threshold == "DoubleEle33SeededLeg":
                suffix = "CaloIdL_MW Seeded leg"
            elif threshold == "DoubleEle33UnseededLeg":
                suffix = "CaloIdL_MW Unseeded leg"
            elif threshold == "Ele23Ele12Leg1":
                suffix = "CaloIdL_TrackIdL_IsoVL Leg1"
            elif threshold == "Ele23Ele12Leg2":
                suffix = "CaloIdL_TrackIdL_IsoVL Leg2"
            else:
                raise ValueError("Couldn't find proper trigger name")

            plateau_cut_dict = {
                "Ele30": 35,
                "Ele32": 35,
                "Ele115": 120,
                "Ele135": 140,
                "Ele23Ele12Leg1": 25,
                "Ele23Ele12Leg2": 15,
                "DoubleEle33SeededLeg": 35,
                "DoubleEle33UnseededLeg": 35,
            }
            plateau_cut = plateau_cut_dict[threshold]

            filename = threshold
            threshold = threshold.replace("Leg1", "").replace("Leg2", "").replace("SeededLeg", "").replace("UnseededLeg", "")

            plottype = "pt_high_threshold" if threshold == "Ele115" or threshold == "Ele135" else "pt_low_threshold"
            title = f"HLT_{threshold}_{suffix}"
            lumi = []
            for r in run:
                try:
                    lm = lumis[r]
                except KeyError:
                    if r == "2022":
                        lm = lumis["2022C"] + lumis["2022D"] + lumis["2022E"] + lumis["2022F"] + lumis["2022G"]
                    elif r == "2023":
                        lm = lumis["2023B"] + lumis["2023C"] + lumis["2023D"]
                    else:
                        lm = "X"
                if not isinstance(lm, str):
                    lm = round(lm, 1)
                lumi.append(lm)

            year = []
            for r in run:
                if "2022" in r:
                    year.append("2022")
                elif "2023" in r:
                    year.append("2023")
                else:
                    year.append("2024")

            rlabel = f"{lumi[0]} $fb^{{-1}}$, {year[0]} (13.6 TeV) - {lumi[1]} $fb^{{-1}}$, {year[1]} (13.6 TeV)"

            (
                hpt_barrel_pass1,
                hpt_barrel_fail1,
                hpt_endcap_loweta_pass1,
                hpt_endcap_loweta_fail1,
                hpt_endcap_higheta_pass1,
                hpt_endcap_higheta_fail1,
                hpt_combined_pass1,
                hpt_combined_fail1,
                heta_entire_pass1,
                heta_entire_fail1,
                hphi_entire_pass1,
                hphi_entire_fail1,
            ) = get_histograms(f"steam_may_2024/{tocompare[0]}/{filename}_hists.root")

            (
                hpt_barrel_pass2,
                hpt_barrel_fail2,
                hpt_endcap_loweta_pass2,
                hpt_endcap_loweta_fail2,
                hpt_endcap_higheta_pass2,
                hpt_endcap_higheta_fail2,
                hpt_combined_pass2,
                hpt_combined_fail2,
                heta_entire_pass2,
                heta_entire_fail2,
                hphi_entire_pass2,
                hphi_entire_fail2,
            ) = get_histograms(f"steam_may_2024/{tocompare[1]}/{filename}_hists.root")

            plot_ratio(
                hpt_barrel_pass1,
                hpt_barrel_fail1,
                hpt_barrel_pass2,
                hpt_barrel_fail2,
                label1=f"{run[0]} $0.00 < |\eta| < 1.44$",
                label2=f"{run[1]} $0.00 < |\eta| < 1.44$",
                plottype=plottype,
                figure_path=f"steam_may_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_barrel_pt.pdf",
                legend_kwargs={"title": title},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hpt_endcap_loweta_pass1,
                hpt_endcap_loweta_fail1,
                hpt_endcap_loweta_pass2,
                hpt_endcap_loweta_fail2,
                label1=f"{run[0]} $1.57 < |\eta| < 2.00$",
                label2=f"{run[1]} $1.57 < |\eta| < 2.00$",
                plottype=plottype,
                figure_path=f"steam_may_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_endcap_loweta_pt.pdf",
                legend_kwargs={"title": title},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hpt_endcap_higheta_pass1,
                hpt_endcap_higheta_fail1,
                hpt_endcap_higheta_pass2,
                hpt_endcap_higheta_fail2,
                label1=f"{run[0]} $2.00 < |\eta| < 2.50$",
                label2=f"{run[1]} $2.00 < |\eta| < 2.50$",
                plottype=plottype,
                figure_path=f"steam_may_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_endcap_higheta_pt.pdf",
                legend_kwargs={"title": title},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hpt_combined_pass1,
                hpt_combined_fail1,
                hpt_combined_pass2,
                hpt_combined_fail2,
                label1=f"{run[0]} $0.00 < |\eta| < 1.44$ or $1.57 < |\eta| < 2.50$",
                label2=f"{run[1]} $0.00 < |\eta| < 1.44$ or $1.57 < |\eta| < 2.50$",
                plottype=plottype,
                figure_path=f"steam_may_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_combined_pt.pdf",
                legend_kwargs={"title": title},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                heta_entire_pass1,
                heta_entire_fail1,
                heta_entire_pass2,
                heta_entire_fail2,
                label1=f"{run[0]} $0.00 < |\eta| < 2.50$",
                label2=f"{run[1]} $0.00 < |\eta| < 2.50$",
                plottype="eta",
                figure_path=f"steam_may_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_eta.pdf",
                legend_kwargs={"title": f"{title}\nProbe electron $P_T> {plateau_cut}$ GeV"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hphi_entire_pass1,
                hphi_entire_fail1,
                hphi_entire_pass2,
                hphi_entire_fail2,
                label1=f"{run[0]} $0.00 < |\eta| < 2.50$",
                label2=f"{run[1]} $0.00 < |\eta| < 2.50$",
                plottype="phi",
                figure_path=f"steam_may_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_phi.pdf",
                legend_kwargs={"title": f"{title}\nProbe electron $P_T> {plateau_cut}$ GeV"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                efficiency_label="L1T + HLT Efficiency",
            )