In [1]:
import os
import shutil

import awkward as ak
import dask
import dask_awkward as dak
import mplhep as hep
import uproot
from coffea.dataset_tools import preprocess
from distributed import Client
from matplotlib import pyplot as plt

import egamma_tnp
from egamma_tnp import ElectronTagNProbeFromNTuples
from egamma_tnp.plot import plot_ratio
from egamma_tnp.utils.histogramming import save_hists

Issue: coffea.lookup_tools.json_lookup will be removed by August 2024. Please use lumi_tools or correctionlib instead!.
  from coffea.lookup_tools.json_lookup import json_lookup


In [2]:
fileset = {
    "data_EGamma_Run2024C": {
        "files": {
            "tnptuples/data_EGamma0_Run2024C.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024C.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024D": {
        "files": {
            "tnptuples/data_EGamma0_Run2024D.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024D.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024Ev1": {
        "files": {
            "tnptuples/data_EGamma0_Run2024Ev1.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024Ev1.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024Ev2": {
        "files": {
            "tnptuples/data_EGamma0_Run2024Ev2.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024Ev2.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024F": {
        "files": {
            "tnptuples/data_EGamma0_Run2024F.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024F.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024C_before_380126": {
        "files": {
            "tnptuples/data_EGamma0_Run2024C.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024C.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024C_after_380126": {
        "files": {
            "tnptuples/data_EGamma0_Run2024C.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024C.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024Ev2+2024F_before_382231": {
        "files": {
            "tnptuples/data_EGamma0_Run2024Ev2.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024Ev2.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma0_Run2024F.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024F.root": "tnpEleTrig/fitter_tree",
        },
    },
    "data_EGamma_Run2024F_after_382231": {
        "files": {
            "tnptuples/data_EGamma0_Run2024F.root": "tnpEleTrig/fitter_tree",
            "tnptuples/data_EGamma1_Run2024F.root": "tnpEleTrig/fitter_tree",
        },
    },
}

fileset_available, fileset_updated = preprocess(fileset, step_size=500_000, skip_bad_files=True)

In [3]:
hlt_paths = {
    "Ele30": "passHLTEle30WPTightGsfTrackIsoFilter",
    "Ele115": "passHLTEle115CaloIdVTGsfTrkIdTGsfDphiFilter",
    "Ele23Ele12Leg1": "passHLTEle23Ele12CaloIdLTrackIdLIsoVLTrackIsoLeg1Filter",
    "Ele23Ele12Leg2": "passHLTEle23Ele12CaloIdLTrackIdLIsoVLTrackIsoLeg2Filter",
    "DoubleEle33SeededLeg": "passHLTEle33CaloIdLMWPMS2Filter",
    "DoubleEle33UnseededLeg": "passHLTDiEle33CaloIdLMWPMS2UnseededFilter",
}

plateau_cuts = {
    "Ele30": 35,
    "Ele115": 120,
    "Ele23Ele12Leg1": 25,
    "Ele23Ele12Leg2": 15,
    "DoubleEle33SeededLeg": 35,
    "DoubleEle33UnseededLeg": 35,
}


def runfilter(events):
    dataset = events.metadata["dataset"]
    if "before" in dataset or "after" in dataset:
        runnumber = int(dataset.split("_")[-1])
    if "before" in dataset:
        return events[events.run < runnumber]
    elif "after" in dataset:
        return events[events.run >= runnumber]
    else:
        return events


tnp = ElectronTagNProbeFromNTuples(
    fileset_available, list(hlt_paths.values()), cutbased_id="passingCutBasedTight122XV1", goldenjson="json/steam_july_2024.json", extra_filter=runfilter
)

In [4]:
%%time

to_compute = {}

for name, trigger in hlt_paths.items():
    if name == "Ele115" or name == "Ele135":
        egamma_tnp.binning.set(
            "pt_bins",
            [
                5,
                10,
                15,
                20,
                22,
                26,
                28,
                30,
                32,
                34,
                36,
                38,
                40,
                45,
                50,
                60,
                80,
                100,
                105,
                110,
                115,
                120,
                125,
                130,
                135,
                140,
                145,
                150,
                200,
                250,
                300,
                350,
                400,
            ],
        )
    else:
        egamma_tnp.binning.set(
            "pt_bins",
            [
                5,
                10,
                12,
                14,
                16,
                18,
                20,
                23,
                26,
                28,
                30,
                32,
                34,
                36,
                38,
                40,
                45,
                50,
                60,
                80,
                100,
                150,
                250,
                400,
            ],
        )
    plateau_cut = plateau_cuts[name]
    to_compute[name] = tnp.get_1d_pt_eta_phi_tnp_histograms(
        trigger,
        uproot_options={"allow_read_errors_with_report": True},
        eta_regions_pt={
            "barrel": [0.0, 1.4442],
            "endcap_loweta": [1.566, 2.0],
            "endcap_higheta": [2.0, 2.5],
        },
        plateau_cut=plateau_cut,
    )


dak.necessary_columns(to_compute)

CPU times: user 38.8 s, sys: 92.7 ms, total: 38.9 s
Wall time: 39.6 s


{'from-uproot-903210c0b7d8afee147c94400e064394': frozenset({'el_eta',
            'el_phi',
            'el_pt',
            'el_q',
            'lumi',
            'pair_mass',
            'passHLTDiEle33CaloIdLMWPMS2UnseededFilter',
            'passHLTEle115CaloIdVTGsfTrkIdTGsfDphiFilter',
            'passHLTEle23Ele12CaloIdLTrackIdLIsoVLTrackIsoLeg1Filter',
            'passHLTEle23Ele12CaloIdLTrackIdLIsoVLTrackIsoLeg2Filter',
            'passHLTEle30WPTightGsfTrackIsoFilter',
            'passHLTEle33CaloIdLMWPMS2Filter',
            'passingCutBasedTight122XV1',
            'run',
            'tag_Ele_eta',
            'tag_Ele_pt',
            'tag_Ele_q'}),
 'from-uproot-098f4d58d379314cae88469b850397e2': frozenset({'el_eta',
            'el_phi',
            'el_pt',
            'el_q',
            'lumi',
            'pair_mass',
            'passHLTDiEle33CaloIdLMWPMS2UnseededFilter',
            'passHLTEle115CaloIdVTGsfTrkIdTGsfDphiFilter',
            'passHLTEle23Ele12

In [5]:
client = Client()

In [6]:
%%time

(out,) = dask.compute(to_compute)

CPU times: user 29.7 s, sys: 1 s, total: 30.7 s
Wall time: 35.5 s


In [7]:
for dataset in out["Ele30"][1].keys():
    path = f"steam_july_2024/{dataset}"
    if os.path.exists(path):
        shutil.rmtree(path)
    os.mkdir(path)

for name, res in out.items():
    hists, report = res
    for dataset, report_arr in report.items():
        ak.to_json(
            report_arr,
            f"steam_july_2024/{dataset}/{name}_report.json",
            num_readability_spaces=1,
            num_indent_spaces=4,
        )
    for dataset, hs in hists.items():
        save_hists(f"steam_july_2024/{dataset}/{name}_hists.root", hs)

In [None]:
hep.style.use("CMS")
hep.style.use(
    {
        "figure.figsize": (6.4, 4.8),
        "font.size": 14,
        "legend.title_fontsize": 14,
        "savefig.bbox": "tight",
    }
)


def get_histograms(path):
    with uproot.open(path) as file:
        hpt_barrel_pass = file["pt/barrel/passing"].to_hist()
        hpt_barrel_fail = file["pt/barrel/failing"].to_hist()
        hpt_endcap_loweta_pass = file["pt/endcap_loweta/passing"].to_hist()
        hpt_endcap_loweta_fail = file["pt/endcap_loweta/failing"].to_hist()
        hpt_endcap_higheta_pass = file["pt/endcap_higheta/passing"].to_hist()
        hpt_endcap_higheta_fail = file["pt/endcap_higheta/failing"].to_hist()
        hpt_combined_pass = hpt_barrel_pass + hpt_endcap_loweta_pass + hpt_endcap_higheta_pass
        hpt_combined_fail = hpt_barrel_fail + hpt_endcap_loweta_fail + hpt_endcap_higheta_fail

        heta_entire_pass = file["eta/entire/passing"].to_hist()
        heta_entire_fail = file["eta/entire/failing"].to_hist()

        hphi_entire_pass = file["phi/entire/passing"].to_hist()
        hphi_entire_fail = file["phi/entire/failing"].to_hist()

    return (
        hpt_barrel_pass,
        hpt_barrel_fail,
        hpt_endcap_loweta_pass,
        hpt_endcap_loweta_fail,
        hpt_endcap_higheta_pass,
        hpt_endcap_higheta_fail,
        hpt_combined_pass,
        hpt_combined_fail,
        heta_entire_pass,
        heta_entire_fail,
        hphi_entire_pass,
        hphi_entire_fail,
    )


def pt_low_threshold_plot_setup(**legend_kwargs):
    plt.xlim(10, 400)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $P_T$ [GeV]")
    plt.ylabel(r"Efficiency")
    plt.xscale("log")
    plt.xticks([10, 100], [10, 100])
    plt.xticks(
        [20, 30, 40, 50, 60, 70, 80, 90, 200, 300, 400],
        [20, 30, 40, 50, None, None, None, None, 200, 300, 400],
        minor=True,
    )
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


def pt_high_threshold_plot_setup(**legend_kwargs):
    plt.xlim(10, 400)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $P_T$ [GeV]")
    plt.ylabel(r"Efficiency")
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


def eta_plot_setup(**legend_kwargs):
    plt.xlim(-2.5, 2.5)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $\eta$")
    plt.ylabel(r"Efficiency")
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


def phi_plot_setup(**legend_kwargs):
    plt.xlim(-3.32, 3.32)
    plt.ylim(0, 1.2)
    plt.xlabel(r"Offline electron $\phi$")
    plt.ylabel(r"Efficiency")
    plt.legend(**legend_kwargs) if legend_kwargs else plt.legend()


lumis = {
    "2023D": 9.525,
    "2024C": 7.416594091,
    "2024D": 7.889161918,
    "2024Ev1": 6.279524894,
    "2024Ev2": 4.454176120,
    "2024F": 3.249118223,
}

In [None]:
for tala in list(hlt_paths.keys()):
    for data_period in ["data_EGamma_Run2024Ev1"]:
        for mc_dataset in ["data_EGamma_Run2024D"]:
            tocompare = [data_period, mc_dataset]
            run = []
            for folder in tocompare:
                run.append(folder.split("_", 2)[2][3:] if "data" in folder else folder.split("_", 1)[1])
            threshold = tala
            if threshold == "Ele32" or threshold == "Ele30":
                suffix = "WPTight_Gsf"
            elif threshold == "Ele115" or threshold == "Ele135":
                suffix = "CaloIdVT_GsfTrkIdT"
            elif threshold == "DoubleEle33SeededLeg":
                suffix = "CaloIdL_MW\nSeeded leg"
            elif threshold == "DoubleEle33UnseededLeg":
                suffix = "CaloIdL_MW\nUnseeded leg"
            elif threshold == "Ele23Ele12Leg1":
                suffix = "CaloIdL_TrackIdL_IsoVL Leg1"
            elif threshold == "Ele23Ele12Leg2":
                suffix = "CaloIdL_TrackIdL_IsoVL Leg2"
            else:
                raise ValueError("Couldn't find proper trigger name")

            plateau_cut_dict = {
                "Ele30": 35,
                "Ele32": 35,
                "Ele115": 120,
                "Ele135": 140,
                "Ele23Ele12Leg1": 25,
                "Ele23Ele12Leg2": 15,
                "DoubleEle33SeededLeg": 35,
                "DoubleEle33UnseededLeg": 35,
            }
            plateau_cut = plateau_cut_dict[threshold]

            filename = threshold
            threshold = threshold.replace("Leg1", "").replace("Leg2", "").replace("SeededLeg", "").replace("UnseededLeg", "")

            plottype = "pt_high_threshold" if threshold == "Ele115" or threshold == "Ele135" else "pt_low_threshold"
            title = f"HLT_{threshold}_{suffix}"
            lumi = []
            for r in run:
                try:
                    l = lumis[r]
                except KeyError:
                    if r == "2022":
                        l = lumis["2022C"] + lumis["2022D"] + lumis["2022E"] + lumis["2022F"] + lumis["2022G"]
                    elif r == "2023":
                        l = lumis["2023B"] + lumis["2023C"] + lumis["2023D"]
                    else:
                        l = "X"
                if not isinstance(l, str):
                    l = round(l, 1)
                lumi.append(l)

            year = []
            for r in run:
                if "2022" in r:
                    year.append("2022")
                elif "2023" in r:
                    year.append("2023")
                else:
                    year.append("2024")

            if mc_dataset.startswith("mc_"):
                rlabel = f"{lumi[0]} $fb^{{-1}}$, {year[0]} (13.6 TeV)"
            else:
                rlabel = f"{lumi[0]} $fb^{{-1}}$, {year[0]} (13.6 TeV) - {lumi[1]} $fb^{{-1}}$, {year[1]} (13.6 TeV)"

            if mc_dataset == "mc_DY_NLO_2023preBPIX":
                eff2_kwargs = {"color": "#5790fc"}
            elif mc_dataset == "mc_DY_NLO_2023postBPIX":
                eff2_kwargs = {"color": "#7a21dd"}
            else:
                eff2_kwargs = {"color": "#e42536"}

            (
                hpt_barrel_pass1,
                hpt_barrel_all1,
                hpt_endcap_loweta_pass1,
                hpt_endcap_loweta_all1,
                hpt_endcap_higheta_pass1,
                hpt_endcap_higheta_all1,
                hpt_combined_pass1,
                hpt_combined_all1,
                heta_entire_pass1,
                heta_entire_all1,
                hphi_entire_pass1,
                hphi_entire_all1,
            ) = get_histograms(f"steam_july_2024/{tocompare[0]}/{filename}_hists.root")

            (
                hpt_barrel_pass2,
                hpt_barrel_all2,
                hpt_endcap_loweta_pass2,
                hpt_endcap_loweta_all2,
                hpt_endcap_higheta_pass2,
                hpt_endcap_higheta_all2,
                hpt_combined_pass2,
                hpt_combined_all2,
                heta_entire_pass2,
                heta_entire_all2,
                hphi_entire_pass2,
                hphi_entire_all2,
            ) = get_histograms(f"steam_july_2024/{tocompare[1]}/{filename}_hists.root")

            plot_ratio(
                hpt_barrel_pass1,
                hpt_barrel_all1,
                hpt_barrel_pass2,
                hpt_barrel_all2,
                label1=f"{run[0]}",
                label2=f"{run[1]}",
                denominator_type="failing",
                plottype=plottype,
                figure_path=f"steam_july_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_barrel_pt.pdf",
                legend_kwargs={"title": f"{title}\n $0.00 < |\eta| < 1.44$"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                eff2_kwargs=eff2_kwargs,
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hpt_endcap_loweta_pass1,
                hpt_endcap_loweta_all1,
                hpt_endcap_loweta_pass2,
                hpt_endcap_loweta_all2,
                label1=f"{run[0]}",
                label2=f"{run[1]}",
                denominator_type="failing",
                plottype=plottype,
                figure_path=f"steam_july_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_endcap_loweta_pt.pdf",
                legend_kwargs={"title": f"{title}\n $1.57 < |\eta| < 2.00$"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                eff2_kwargs=eff2_kwargs,
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hpt_endcap_higheta_pass1,
                hpt_endcap_higheta_all1,
                hpt_endcap_higheta_pass2,
                hpt_endcap_higheta_all2,
                label1=f"{run[0]}",
                label2=f"{run[1]}",
                denominator_type="failing",
                plottype=plottype,
                figure_path=f"steam_july_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_endcap_higheta_pt.pdf",
                legend_kwargs={"title": f"{title}\n $2.00 < |\eta| < 2.50$"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                eff2_kwargs=eff2_kwargs,
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hpt_combined_pass1,
                hpt_combined_all1,
                hpt_combined_pass2,
                hpt_combined_all2,
                label1=f"{run[0]}",
                label2=f"{run[1]}",
                denominator_type="failing",
                plottype=plottype,
                figure_path=f"steam_july_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_combined_pt.pdf",
                legend_kwargs={"title": f"{title}\n $0.00 < |\eta| < 1.44$ or $1.57 < |\eta| < 2.50$"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                eff2_kwargs=eff2_kwargs,
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                heta_entire_pass1,
                heta_entire_all1,
                heta_entire_pass2,
                heta_entire_all2,
                label1=f"{run[0]}",
                label2=f"{run[1]}",
                denominator_type="failing",
                plottype="eta",
                figure_path=f"steam_july_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_eta.pdf",
                legend_kwargs={"title": f"{title}\n $0.00 < |\eta| < 2.50$\nProbe electron $P_T> {plateau_cut}$ GeV"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                eff2_kwargs=eff2_kwargs,
                efficiency_label="L1T + HLT Efficiency",
            )

            plot_ratio(
                hphi_entire_pass1,
                hphi_entire_all1,
                hphi_entire_pass2,
                hphi_entire_all2,
                label1=f"{run[0]}",
                label2=f"{run[1]}",
                denominator_type="failing",
                plottype="phi",
                figure_path=f"steam_july_2024/{filename}_{run[0]}_vs_{run[1]}_HLT_eff_phi.pdf",
                legend_kwargs={"title": f"{title}\n $0.00 < |\eta| < 2.50$\nProbe electron $P_T> {plateau_cut}$ GeV"},
                cms_kwargs={"loc": 1, "rlabel": rlabel},
                eff2_kwargs=eff2_kwargs,
                efficiency_label="L1T + HLT Efficiency",
            )