In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.2

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2018'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [4]:
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
coffeadir_prefix = '/myeosdir/ggf-vbf/outfiles-ddb2/'

repickle = True
# Check if pickle exists, remove it if it does
picklename = 'pickles/'+str(year)+'_cutflow.pkl'
if os.path.isfile(picklename):
    repickle = False

In [5]:
if repickle:

    for n in range(1,nfiles+1):
    
        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = coffeadir_prefix+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)

            if n == 1:
                outsum['cutflow'] = out['cutflow']
                outsum['sumw'] = out['sumw']
            else:
                outsum['cutflow'].add(out['cutflow'])
                outsum['sumw'].add(out['sumw'])
            
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
    
    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()

  left[lkey] += right[rkey]


In [6]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [7]:
cutflow = templates.sum('genflavor').integrate('region',['signal-ggf','signal-vbf'])
cutflow

<Hist (process,cut) instance at 0x7fea1d67deb0>

In [8]:
cutflow.values()

{('ZH',): array([848.24559579, 848.24559579, 848.24559579, 144.95652342,
        144.95652342, 137.23167475,  84.9427479 ,  39.30263355,
         29.19697697,  25.33948744,  24.60432926,   1.03691271,
          0.        ,   0.        ,   0.        ]),
 ('WH',): array([998.03052739, 998.03052739, 998.03052739, 184.31908085,
        184.31908085, 174.82985809, 105.45373654,  53.26005598,
         36.87123608,  32.35733931,  31.59881261,   1.1003186 ,
          0.        ,   0.        ,   0.        ]),
 ('ttH',): array([2.36760035e+03, 2.36760035e+03, 2.36760035e+03, 6.51354376e+02,
        6.51354376e+02, 5.53709451e+02, 1.98006692e+02, 6.00239810e+01,
        4.68304699e+01, 3.11974835e+01, 3.05081528e+01, 4.08512330e-01,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([1855.7213936 , 1855.7213936 , 1855.7213936 ,  141.35282149,
         141.35282149,  137.2922845 ,   77.41920513,   61.80150783,
          60.35946255,   58.6481988 ,   34.46497615,    2.090497

In [9]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[:-3].astype('int')
df1.index = ['nothing','trigger','lumimask','minjetkin','jetid','jetacceptance','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
nothing,4213,1855,998,848,2367
trigger,4213,1855,998,848,2367
lumimask,4213,1855,998,848,2367
minjetkin,393,141,184,144,651
jetid,393,141,184,144,651
jetacceptance,376,137,174,137,553
n2ddt,189,77,105,84,198
antiak4btagMediumOppHem,142,61,53,39,60
met,139,60,36,29,46
noleptons,135,58,32,25,31


In [10]:
df1 = df1[3:-2]
df1.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df1.to_latex(buf=year+'/cutflow-sig.tex')
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
Jet kinematics,393,141,184,144,651
Jet ID,393,141,184,144,651
Jet acceptance,376,137,174,137,553
Jet $N_2^\text{DDT}$,189,77,105,84,198
Opp. hem. b veto,142,61,53,39,60
MET $<$ 140 GeV,139,60,36,29,46
No leptons,135,58,32,25,31


In [11]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[:-3].astype('int')
df2.index = ['nothing','trigger','lumimask','minjetkin','jetid','jetacceptance','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
nothing,913150319,6616635,2237454,56864,2031878,192576
trigger,913150319,6616635,2237454,56864,2031878,192576
lumimask,913150319,6616635,2237454,56864,2031878,192576
minjetkin,37043052,614166,256931,7786,344712,30509
jetid,37043052,614166,256931,7786,344712,30509
jetacceptance,34635571,561081,233862,7409,311840,27990
n2ddt,9466709,228256,106222,4323,110968,14132
antiak4btagMediumOppHem,8611960,208829,92950,3877,55170,6545
met,8521387,186375,91019,2899,43281,4905
noleptons,8318525,161128,68095,2239,29906,3687


In [12]:
df2 = df2[3:-2]
df2.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df2.to_latex(buf=year+'/cutflow-bkg.tex')

In [13]:
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
Jet kinematics,37043052,614166,256931,7786,344712,30509
Jet ID,37043052,614166,256931,7786,344712,30509
Jet acceptance,34635571,561081,233862,7409,311840,27990
Jet $N_2^\text{DDT}$,9466709,228256,106222,4323,110968,14132
Opp. hem. b veto,8611960,208829,92950,3877,55170,6545
MET $<$ 140 GeV,8521387,186375,91019,2899,43281,4905
No leptons,8318525,161128,68095,2239,29906,3687
