In [None]:
import awkward as ak
import vector
import hist
import matplotlib.pyplot as plt
import mplhep
import uproot

from atlas_schema.schema import NtupleSchema
from coffea import dataset_tools
from coffea.nanoevents import NanoEventsFactory

vector.register_awkward()
mplhep.style.use(mplhep.style.ATLAS1)

In [None]:
fileset = {
    "ttbar": {
        "files": {"output.root": "reco"},
        "metadata": {"tets_meta": 0}
    }
}

# interactive exploration

In [None]:
events = NanoEventsFactory.from_root(fileset["ttbar"]["files"], mode="virtual", schemaclass=NtupleSchema).events()
_ = ak.materialize(events)  # force read everything (similar to using eager mode, only use this for small inputs and testing)

In [None]:
events.systematic_names

In [None]:
h = hist.new.Regular(12, 0, 300, label="leading jet $p_T$").StrCat([], name="variation", growth=True).Weight()

for variation in ["nominal"] + events.systematic_names:
    print(variation)
    event_view = events if variation == "nominal" else events[variation]
    h.fill(event_view.jet.pt[:, 0] / 1_000, variation=variation)


fig, ax = plt.subplots()
for variation in h.axes[1]:
    h[:, variation].plot(histtype="step", label=variation, ax=ax)
ax.legend()

In [None]:
# with uproot, manually
f = uproot.open(fileset["ttbar"]["files"])
arr = f.arrays(["jet_pt_NOSYS", "jet_eta", "jet_phi", "jet_e_NOSYS", "jet_pt_JET_JER_EffectiveNP_1__1up", "jet_e_JET_JER_EffectiveNP_1__1up"])
jets = ak.zip({"pt": arr["jet_pt_NOSYS"], "eta": arr["jet_eta"], "phi": arr["jet_phi"], "energy": arr["jet_e_NOSYS"]}, with_name="PtEtaPhiECandidate")
jets.pt == events.jet.pt

In [None]:
# compare systematic variation
jets_JER1 = ak.zip({"pt": arr["jet_pt_JET_JER_EffectiveNP_1__1up"], "eta": arr["jet_eta"], "phi": arr["jet_phi"], "energy": arr["jet_e_JET_JER_EffectiveNP_1__1up"]}, with_name="PtEtaPhiECandidate")
jets_JER1.pt == events["JET_JER_EffectiveNP_1__1up"].jet.pt

# processor approach

In [None]:
# coffea processor version
from coffea import processor

class Analysis(processor.ProcessorABC):
    def __init__(self):
        self.h = hist.new.Regular(12, 0, 300, label="leading jet $p_T$").StrCat([], name="variation", growth=True).Weight()

    def process(self, events):
        for variation in ["nominal"] + events.systematic_names:
            event_view = events if variation == "nominal" else events[variation]
            h.fill(event_view.jet.pt[:, 0] / 1_000, variation=variation)
        return h

    def postprocess(self, accumulator):
        pass


run = processor.Runner(
    executor = processor.IterativeExecutor(),
    schema=NtupleSchema,
    savemetrics=True,
    chunksize=5  # tiny to simulate multiple chunks
)

samples = run.preprocess(fileset)

out, report = run(samples, processor_instance=Analysis())

out, report

out[:, "nominal"].plot()

In [None]:
# old, for manual construction
from collections import defaultdict

def get_all_systematics(events, objects):
    affected_map = defaultdict(lambda: {})
    for obj in objects:
        # find all cases where a `*_syst` field exists
        # those presumably correspond to cases where branches have _NOSYS suffix
        for syst_field in [f for f in events[obj].fields if f.endswith("_syst")]:
            # for each field, check if a variation actually exists
            variations = sorted(list(set(events[obj][syst_field].fields) - {"NOSYS"}))
            if len(variations):
                affected_map[obj].update({syst_field.removesuffix("_syst"): variations})
                # print(syst_field, variations)
    return affected_map

affected_map = get_all_systematics(events, ("jet", "el"))
affected_map