In [11]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

In [12]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [13]:
year = '2017'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [14]:
repickle=True

# Check if pickle exists, and don't re-create it if it does
picklename = 'pickles/'+str(year)+'_cutflow.pkl'
if os.path.isfile(picklename):
    repickle=False

In [15]:
# Load all files - this takes a while
if repickle:
    nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
    for n in range(1,nfiles+1):

        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = '/myeosdir/vbf-category/outfiles/'+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)
            outsum.add(out)
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")
        
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)

    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()

Missing file 2416 dict_keys(['WJetsToLNu_HT-800To1200_TuneCP5_13TeV-madgraphMLM-pythia8'])




In [16]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [20]:
cutflow = templates.sum('genflavor').integrate('region','signal')
cutflow

<Hist (process,cut) instance at 0x7fe257efae80>

In [22]:
cutflow.values()

{('ZH',): array([2.21747298e+04, 2.21747298e+04, 2.21747298e+04, 9.49852519e+01,
        9.49852519e+01, 9.04880892e+01, 5.51967709e+01, 2.62442052e+01,
        1.97360044e+01, 1.67392741e+01, 1.45005465e+01, 5.61619252e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('WH',): array([2.30628496e+04, 2.30628496e+04, 2.30628496e+04, 9.13654428e+01,
        9.13654428e+01, 8.66382904e+01, 5.00761613e+01, 2.83562475e+01,
        1.78139051e+01, 1.48700475e+01, 1.15584566e+01, 3.55353679e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttH',): array([8.62415990e+03, 8.62415990e+03, 8.62415990e+03, 4.38370048e+02,
        4.38370048e+02, 3.69531134e+02, 1.29608221e+02, 4.34946785e+01,
        3.33283340e+01, 2.20512537e+01, 2.19544867e+01, 5.79109185e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([8.95949424e+04, 8.95949424e+04, 8.95949424e+04, 9.00969471e+01,
        9.00969471e+01, 8.74344454e+01, 4.83479656e+01, 3.97708020e+

In [23]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
#df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:-3].astype('int')
df1.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','2 AK4 jets','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [26]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:-3].astype('int')
df2.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','2 AK4 jets','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')

             QCD         Wjets         Zjets            VV         ttbar  \
0   1.205111e+10  2.628447e+07  5.679276e+06  4.788700e+06  3.144030e+07   
1   1.205111e+10  2.628447e+07  5.679276e+06  4.788700e+06  3.144030e+07   
2   1.205111e+10  2.628447e+07  5.679276e+06  4.788700e+06  3.144030e+07   
3   2.255570e+07  4.032667e+05  1.861593e+05  5.242448e+03  2.309422e+05   
4   2.255570e+07  4.032667e+05  1.861593e+05  5.242448e+03  2.309422e+05   
5   2.084924e+07  3.635525e+05  1.675579e+05  4.961115e+03  2.077487e+05   
6   5.489094e+06  1.456184e+05  7.499058e+04  2.870781e+03  7.184426e+04   
7   5.066316e+06  1.356185e+05  6.694697e+04  2.627806e+03  3.836097e+04   
8   4.957781e+06  1.210691e+05  6.488062e+04  1.964926e+03  2.959958e+04   
9   4.822519e+06  1.051639e+05  4.902460e+04  1.541998e+03  2.032769e+04   
10  4.010631e+06  9.146526e+04  4.330150e+04  1.287936e+03  1.977746e+04   
11  6.501641e+04  1.922858e+03  2.831747e+03  5.597118e+01  2.339596e+03   
12  0.000000