In [None]:
import dask
import dask_awkward as dak
from coffea.dataset_tools import preprocess
from distributed import Client
from matplotlib import pyplot as plt

import egamma_tnp
from egamma_tnp import ElectronTagNProbeFromNanoAOD, ElectronTagNProbeFromNTuples
from egamma_tnp.plot import plot_efficiency

In [None]:
client = Client()

In [None]:
filters_to_check = [
    "hltEG30L1SingleEGOrEtFilter",
    "hltEle30WPTightClusterShapeFilter",
    "hltEle30WPTightHEFilter",
    "hltEle30WPTightEcalIsoFilter",
    "hltEle30WPTightHcalIsoFilter",
    "hltEle30WPTightPixelMatchFilter",
    "hltEle30WPTightPMS2Filter",
    "hltEle30WPTightGsfOneOEMinusOneOPFilter",
    "hltEle30WPTightGsfMissingHitsFilter",
    "hltEle30WPTightGsfDetaFilter",
    "hltEle30WPTightGsfDphiFilter",
    "hltEle30WPTightGsfTrackIsoFilter",
]

In [None]:
fileset = {
    "data_EGamma_Run2023C": {"files": {"tnptuples/data_EGamma_Run2023C.root": "tnpEleTrig/fitter_tree"}},
    "DY_NLO_2023preBPIX": {
        "files": {"tnptuples/mc_DY_NLO_2023preBPIX.root": "tnpEleTrig/fitter_tree"},
        "metadata": {"isMC": True, "pileupJSON": "jsonpog-integration/POG/LUM/2023_Summer23/puWeights.json.gz"},
    },
}

fileset_available, fileset_updates = preprocess(fileset, skip_bad_files=True, step_size=500_000)

In [None]:
egamma_tnp.binning.set(
    "pt_bins",
    [
        5,
        10,
        12,
        14,
        16,
        18,
        20,
        23,
        26,
        28,
        30,
        32,
        34,
        36,
        38,
        40,
        45,
        50,
        60,
        80,
        100,
        150,
        250,
        400,
    ],
)

to_compute = {}

tnp = ElectronTagNProbeFromNTuples(
    fileset_available,
    [f"pass{filter.replace('hlt', 'HLT')}" for filter in filters_to_check],
    use_sc_eta=False,
    probes_pt_cut=5,
    cutbased_id="passingCutBasedTight122XV1",
)

for filter in filters_to_check:
    to_compute[filter] = tnp.get_1d_pt_eta_phi_tnp_histograms(
        f"pass{filter.replace('hlt', 'HLT')}",
        vars=["el_pt", "el_sc_eta", "el_sc_phi"],
        eta_regions_pt={
            "barrel": [0.0, 1.4442],
            "endcap_loweta": [1.566, 2.0],
            "endcap_higheta": [2.0, 2.5],
        },
        plateau_cut=5,
        uproot_options={"allow_read_errors_with_report": False},
    )

dak.necessary_columns(to_compute)

In [None]:
%%time

(out,) = dask.compute(to_compute)

In [None]:
for filter in filters_to_check:
    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["pt"]["barrel"]["passing"],
            hists["pt"]["barrel"]["failing"],
            label=f"{filter} {dataset} barrel",
        )
    plt.ylim(0, 1.2)
    plt.xscale("log")
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["pt"]["endcap_loweta"]["passing"],
            hists["pt"]["endcap_loweta"]["failing"],
            label=f"{filter} {dataset} endcap loweta",
        )
    plt.ylim(0, 1.2)
    plt.xscale("log")
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["pt"]["endcap_higheta"]["passing"],
            hists["pt"]["endcap_higheta"]["failing"],
            label=f"{filter} {dataset} endcap higheta",
        )
    plt.ylim(0, 1.2)
    plt.xscale("log")
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["eta"]["entire"]["passing"],
            hists["eta"]["entire"]["failing"],
            label=f"{filter} {dataset}",
        )
    plt.ylim(0, 1.2)
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["phi"]["entire"]["passing"],
            hists["phi"]["entire"]["failing"],
            label=f"{filter} {dataset}",
        )
    plt.ylim(0, 1.2)
    plt.legend()
    plt.show()

In [None]:
fileset = {
    "DY_NLO_2023preBPIX": {
        "files": {
            "root_files/Egamma0.root": "Events",
            "root_files/Egamma1.root": "Events",
        },
        "metadata": {"isMC": False, "pileupJSON": "jsonpog-integration/POG/LUM/2023_Summer23/puWeights.json.gz"},
    },
}

fileset_available, fileset_updates = preprocess(fileset, skip_bad_files=True, step_size=100_000)

In [None]:
filters_to_check = ["HLT_Ele30_WPTight_Gsf", "HLT_Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_Leg1"]

In [None]:
egamma_tnp.binning.set(
    "pt_bins",
    [
        5,
        10,
        12,
        14,
        16,
        18,
        20,
        23,
        26,
        28,
        30,
        32,
        34,
        36,
        38,
        40,
        45,
        50,
        60,
        80,
        100,
        150,
        250,
        400,
    ],
)

to_compute = {}

tnp = ElectronTagNProbeFromNanoAOD(
    fileset,
    filters_to_check,
    use_sc_eta=True,
    probes_pt_cut=5,
    cutbased_id="cutBased >= 4",
    filterbit=[1, 4],
    require_event_to_pass_hlt_filter=True,
)

for filter in filters_to_check:
    to_compute[filter] = tnp.get_1d_pt_eta_phi_tnp_histograms(
        filter,
        vars=["el_pt", "el_eta", "el_phi"],
        eta_regions_pt={
            "barrel": [0.0, 1.4442],
            "endcap_loweta": [1.566, 2.0],
            "endcap_higheta": [2.0, 2.5],
        },
        plateau_cut=5,
        uproot_options={"allow_read_errors_with_report": False},
    )

dak.necessary_columns(to_compute)

In [None]:
%%time

(out,) = dask.compute(to_compute)

In [None]:
for filter in filters_to_check:
    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["pt"]["barrel"]["passing"],
            hists["pt"]["barrel"]["failing"],
            label=f"{filter} {dataset} barrel",
        )
    plt.ylim(0, 1.2)
    plt.xscale("log")
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["pt"]["endcap_loweta"]["passing"],
            hists["pt"]["endcap_loweta"]["failing"],
            label=f"{filter} {dataset} endcap loweta",
        )
    plt.ylim(0, 1.2)
    plt.xscale("log")
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["pt"]["endcap_higheta"]["passing"],
            hists["pt"]["endcap_higheta"]["failing"],
            label=f"{filter} {dataset} endcap higheta",
        )
    plt.ylim(0, 1.2)
    plt.xscale("log")
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["eta"]["entire"]["passing"],
            hists["eta"]["entire"]["failing"],
            label=f"{filter} {dataset}",
        )
    plt.ylim(0, 1.2)
    plt.legend()
    plt.show()

    all_hists = out[filter]
    plt.figure(figsize=(10, 8))
    for dataset, hists in all_hists.items():
        plot_efficiency(
            hists["phi"]["entire"]["passing"],
            hists["phi"]["entire"]["failing"],
            label=f"{filter} {dataset}",
        )
    plt.ylim(0, 1.2)
    plt.legend()
    plt.show()