In [11]:
import numpy as np
import awkward as ak
import _pickle as cPickle
import hist
import json
from coffea import processor

In [12]:
dataset_short_name = {
    'ZJetsToNuNu_HT-100To200_TuneCP5_13TeV-madgraphMLM-pythia8': "ZJetsToNuNu_HT-100To200",
    'ZJetsToNuNu_HT-1200To2500_TuneCP5_13TeV-madgraphMLM-pythia8': "ZJetsToNuNu_HT-1200To2500",
    "ZJetsToNuNu_HT-200To400_TuneCP5_13TeV-madgraphMLM-pythia8": "ZJetsToNuNu_HT-200To400",
    "ZJetsToNuNu_HT-2500ToInf_TuneCP5_13TeV-madgraphMLM-pythia8": "ZJetsToNuNu_HT-2500ToInf",
    "ZJetsToNuNu_HT-400To600_TuneCP5_13TeV-madgraphMLM-pythia8": "ZJetsToNuNu_HT-400To600",
    'ZJetsToNuNu_HT-600To800_TuneCP5_13TeV-madgraphMLM-pythia8': 'ZJetsToNuNu_HT-600To800',
    'ZJetsToNuNu_HT-800To1200_TuneCP5_13TeV-madgraphMLM-pythia8': 'ZJetsToNuNu_HT-800To1200'
}

In [13]:
class BackgroundEstimatorProcessor(processor.ProcessorABC):
    def __init__(self):
        self.make_output = lambda: {
            'sumw': 0.,
            'n_events': 0,
            'met_kin': hist.Hist(hist.axis.Regular(40, 50, 400, name='met', label='$p_T^{miss}$ [GeV]'))
        }  

    def process(self, events):   
        dataset = events.metadata['dataset']

        mask = events.MET.pt > 50
        
        output = self.make_output()
        output['sumw'] = ak.sum(events.genWeight)
        output['n_events'] = len(events[mask])
        output["met_kin"].fill(met=events[mask].MET.pt)
        
        return {dataset_short_name[dataset]: output}
    
    def postprocess(self, accumulator):   
        return accumulator

In [14]:
from dask.distributed import Client

client = Client("tls://daniel-2eocampo-2ehenao-40cern-2ech.dask.coffea.casa:8786")
client

0,1
Connection method: Direct,
Dashboard: /user/daniel.ocampo.henao@cern.ch/proxy/8787/status,

0,1
Comm: tls://192.168.7.129:8786,Workers: 1
Dashboard: /user/daniel.ocampo.henao@cern.ch/proxy/8787/status,Total threads: 2
Started: 9 minutes ago,Total memory: 7.00 GiB

0,1
Comm: tls://daniel-2eocampo-2ehenao-40cern-2ech.dask-worker.coffea.casa:8788,Total threads: 2
Dashboard: /user/daniel.ocampo.henao@cern.ch/proxy/39321/status,Memory: 7.00 GiB
Nanny: tls://192.168.7.129:33013,
Local directory: /home/cms-jovyan/dask-worker-space/worker-mmsp4_4q,Local directory: /home/cms-jovyan/dask-worker-space/worker-mmsp4_4q
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 4.0%,Last seen: Just now
Memory usage: 614.13 MiB,Spilled bytes: 0 B
Read bytes: 46.81 kiB,Write bytes: 50.58 kiB


In [15]:
data = ak.from_json("/home/cms-jovyan/VBFDM_UdeA/fileset/fileset_2017_UL_NANO.json")

fileset = {}
for dataset in data.fields:
    if "ZJetsToNuNu" in dataset:
        fileset[dataset] = ["root://xcache/" + file for file in data[dataset]]

In [16]:
out = processor.run_uproot_job(
    fileset,
    treename="Events",
    processor_instance=BackgroundEstimatorProcessor(),
    executor=processor.dask_executor,
    executor_args={
        "schema": processor.NanoAODSchema,
        "client": client,
    },
)

[########################################] | 100% Completed |  2min 30.8s

In [17]:
for dataset in out: 
    with open(f'/home/cms-jovyan/VBFDM_UdeA/notebooks/background estimation/output/{dataset}.pkl', 'wb') as f:
        cPickle.dump(out[dataset], f, protocol=-1)