In [None]:
import awkward as ak
import vector
import hist
import matplotlib.pyplot as plt
import mplhep
import uproot

from atlas_schema.schema import NtupleSchema
from coffea import dataset_tools
from coffea.nanoevents import NanoEventsFactory

vector.register_awkward()
mplhep.style.use(mplhep.style.ATLAS1)

In [None]:
fileset = {
    "ttbar": {
        "files": {"output.root": "reco"},
        "metadata": {"test_meta": 0}
    }
}

# interactive exploration

In [None]:
events = NanoEventsFactory.from_root(fileset["ttbar"]["files"], mode="virtual", schemaclass=NtupleSchema).events()
_ = ak.materialize(events)  # force read everything (similar to using eager mode, only use this for small inputs and testing)

In [None]:
h = hist.new.Regular(12, 0, 300, label="leading jet $p_T$").StrCat([], name="variation", growth=True).Weight()

for variation in ["NOSYS"] + events.systematic_names:
    print(variation)
    event_view = events if variation == "NOSYS" else events[variation]
    h.fill(event_view.jet.pt[:, 0] / 1_000, variation=variation)


fig, ax = plt.subplots()
for variation in h.axes[1]:
    h[:, variation].plot(histtype="step", label=variation, ax=ax)
ax.legend()

In [None]:
# with uproot, manually
f = uproot.open(fileset["ttbar"]["files"])
arr = f.arrays(["jet_pt_NOSYS", "jet_eta", "jet_phi", "jet_e_NOSYS", "jet_pt_JET_JER_EffectiveNP_1__1up", "jet_e_JET_JER_EffectiveNP_1__1up"])
jets = ak.zip({"pt": arr["jet_pt_NOSYS"], "eta": arr["jet_eta"], "phi": arr["jet_phi"], "energy": arr["jet_e_NOSYS"]}, with_name="PtEtaPhiECandidate")
jets.pt == events.jet.pt

In [None]:
# compare systematic variation
jets_JER1 = ak.zip({"pt": arr["jet_pt_JET_JER_EffectiveNP_1__1up"], "eta": arr["jet_eta"], "phi": arr["jet_phi"], "energy": arr["jet_e_JET_JER_EffectiveNP_1__1up"]}, with_name="PtEtaPhiECandidate")
jets_JER1.pt == events["JET_JER_EffectiveNP_1__1up"].jet.pt

# processor approach

In [None]:
# coffea processor version
from coffea import processor


class Analysis(processor.ProcessorABC):
    def __init__(self):
        self.h = hist.new.Regular(12, 0, 300, label="leading jet $p_T$").StrCat([], name="variation", growth=True).Weight()

    def process(self, events):
        for variation in ["nominal"] + events.systematic_names:
            event_view = events if variation == "nominal" else events[variation]
            self.h.fill(event_view.jet.pt[:, 0] / 1_000, variation=variation)
        return self.h

    def postprocess(self, accumulator):
        pass


run = processor.Runner(
    executor = processor.IterativeExecutor(),
    schema=NtupleSchema,
    savemetrics=True,
    chunksize=5  # tiny to simulate multiple chunks
)

samples = run.preprocess(fileset)

out, report = run(samples, processor_instance=Analysis())

out, report

out[:, "nominal"].plot()

In [None]:
import base64
import dataclasses
import json


def write_samples(samples, fname):
    # encode bytes
    serializable = []
    for s in samples:
        chunk = dataclasses.asdict(s)
        chunk["fileuuid"] = base64.b64encode(chunk["fileuuid"]).decode("ascii")
        serializable.append(chunk)
    
    with open(fname, "w") as f:
        json.dump(serializable, f)


def read_samples(fname):
    with open(fname) as f:
        samples = json.load(f)
    
    # decode bytes
    for i in range(len(samples)):
        samples[i]["fileuuid"] = base64.b64decode(samples[i]["fileuuid"])
        samples[i] = coffea.processor.executor.WorkItem(**samples[i])

    return samples


samples = run.preprocess(fileset)
write_samples(samples, "samples.json")
samples = read_samples("samples.json")

samples

In [None]:
hist.Hist(uproot.open("output.root")["listOfSystematics"]).axes[0]