In [1]:
import numpy as np
import awkward as ak
import _pickle as cPickle
import json
import hist
from coffea import processor
from coffea.nanoevents.methods import vector

In [2]:
dataset_short_name = {
    "ZToMuMu_M-50To120_TuneCP5_13TeV-powheg-pythia8": "ZToMuMu_M-50To120",
    "ZToMuMu_M-120To200_TuneCP5_13TeV-powheg-pythia8": "ZToMuMu_M-120To200",
    "ZToMuMu_M-200To400_TuneCP5_13TeV-powheg-pythia8": "ZToMuMu_M-200To400",
    "ZToMuMu_M-400To800_TuneCP5_13TeV-powheg-pythia8": "ZToMuMu_M-400To800",
    "ZToMuMu_M-800To1400_TuneCP5_13TeV-powheg-pythia8": "ZToMuMu_M-800To1400"
}

In [3]:
class BackgroundEstimatorProcessor(processor.ProcessorABC):
    def __init__(self):
        self.make_output = lambda: {
            'sumw': 0.,
            'n_events': 0,
            'met_kin': hist.Hist(hist.axis.Regular(40, 50, 400, name='met', label='$p_T^{miss}$ [GeV]'))
        }
        
    def get_pt_x(self, obj: ak.Array):
        return ak.sum(obj.pt * np.cos(obj.phi), axis=1)

    def get_pt_y(self, obj: ak.Array):
        return ak.sum(obj.pt * np.sin(obj.phi), axis=1)

    def get_met(self, objs: list):
        """return a Momentum2D for MET"""
        pt_x = ak.sum([self.get_pt_x(obj) for obj in objs], 
                      axis=0, 
                      mask_identity=True)
        pt_y = ak.sum([self.get_pt_y(obj) for obj in objs], 
                      axis=0, 
                      mask_identity=True)
        
        return ak.zip(
            {
                "pt": np.sqrt(pt_x ** 2 + pt_y ** 2), 
                "phi": np.arctan(pt_y / pt_x) + np.pi
            },
            with_name="Momentum2D",
            behavior=vector.behavior
        )      

    def process(self, events):   
        dataset = events.metadata['dataset']
        muon = ak.pad_none(events.Muon, target=2)
        leading_muons_mass = (muon[:, 0] + muon[:, 1]).mass

        mask = (
            (np.sum(muon.pt > 0, axis=1) == 2)
            & (ak.prod(muon.charge, axis=1) == -1)
            & (muon.pt[:, 0] > 50)
            & (muon.pt[:, 1] > 50)
            & (abs(muon.eta)[:, 0] < 2.1)
            & (abs(muon.eta)[:, 1] < 2.1)
            & ((60 < leading_muons_mass) & (leading_muons_mass < 120))
        )
        
        met = self.get_met([events[mask].Jet,
                            events[mask].Electron,
                            events[mask].Photon])
        
        output = self.make_output()
        output['sumw'] = ak.sum(events.genWeight)
        output['n_events'] = len(events[mask])
        output["met_kin"].fill(met=ak.flatten(met["pt"], axis=0))
        
        return {dataset_short_name[dataset]: output}
    
    def postprocess(self, accumulator):   
        return accumulator

In [4]:
from dask.distributed import Client

client = Client("tls://daniel-2eocampo-2ehenao-40cern-2ech.dask.coffea.casa:8786")
client

0,1
Connection method: Direct,
Dashboard: /user/daniel.ocampo.henao@cern.ch/proxy/8787/status,

0,1
Comm: tls://192.168.7.129:8786,Workers: 1
Dashboard: /user/daniel.ocampo.henao@cern.ch/proxy/8787/status,Total threads: 2
Started: 4 minutes ago,Total memory: 7.00 GiB

0,1
Comm: tls://daniel-2eocampo-2ehenao-40cern-2ech.dask-worker.coffea.casa:8788,Total threads: 2
Dashboard: /user/daniel.ocampo.henao@cern.ch/proxy/39321/status,Memory: 7.00 GiB
Nanny: tls://192.168.7.129:33013,
Local directory: /home/cms-jovyan/dask-worker-space/worker-mmsp4_4q,Local directory: /home/cms-jovyan/dask-worker-space/worker-mmsp4_4q
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 6.0%,Last seen: Just now
Memory usage: 516.65 MiB,Spilled bytes: 0 B
Read bytes: 15.07 kiB,Write bytes: 20.55 kiB


In [5]:
data = ak.from_json("/home/cms-jovyan/VBFDM_UdeA/fileset/ZToMuMu.json")

fileset = {}
for dataset in data.fields:
    fileset[dataset] = ["root://xcache/" + file for file in data[dataset]] 

In [6]:
out = processor.run_uproot_job(
    fileset,
    treename="Events",
    processor_instance=BackgroundEstimatorProcessor(),
    executor=processor.dask_executor,
    executor_args={
        "schema": processor.NanoAODSchema,
        "client": client,
    },
)

[########################################] | 100% Completed |  1min 18.9s

In [7]:
for dataset in out:
    with open(f'/home/cms-jovyan/VBFDM_UdeA/notebooks/background estimation/output/{dataset}.pkl', 'wb') as f:
        cPickle.dump(out[dataset], f, protocol=-1)