In [25]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

In [26]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.2

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [27]:
year = '2016'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [28]:
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
coffeadir_prefix = '/myeosdir/ggf-vbf/outfiles-ddb2/'+year+'/'

repickle = True
# Check if pickle exists, don't recreate it if it does
picklename = year+'/cutflow.pkl'
if os.path.isfile(picklename):
    repickle = False

In [29]:
if repickle:

    for n in range(1,nfiles+1):
    
        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = coffeadir_prefix+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)

            if n == 1:
                outsum['cutflow'] = out['cutflow']
                outsum['sumw'] = out['sumw']
            else:
                outsum['cutflow'].add(out['cutflow'])
                outsum['sumw'].add(out['sumw'])
                
            del out
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")  
            
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
    
    del outsum
    
    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()



In [30]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [31]:
cutflow = templates.sum('genflavor').integrate('region',['signal-ggf'])
cutflow

<Hist (process,cut) instance at 0x7f496b817cd0>

In [32]:
cutflow.values()

{('ZH',): array([1.69485517e+03, 7.73572122e+01, 7.73572122e+01, 7.70869492e+01,
        2.59654957e+01, 2.59654957e+01, 1.47117306e+01, 8.90667586e+00,
        3.05196837e+00, 9.91901933e-01, 9.32026379e-01, 2.09791719e-01,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('WH',): array([5.32754689e+03, 8.90047815e+01, 8.90047815e+01, 8.86599735e+01,
        2.67147250e+01, 2.67147250e+01, 1.36304529e+01, 1.18966360e+01,
        2.83318833e+00, 5.52071177e-01, 5.33091984e-01, 7.29746738e-02,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttH',): array([5.09479400e+03, 1.27240290e+03, 1.27240290e+03, 1.26830699e+03,
        3.65340660e+02, 3.65340660e+02, 1.36900888e+02, 4.14190620e+01,
        3.26922708e+01, 2.20052596e+01, 2.15349487e+01, 1.65390636e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([6.10804330e+03, 4.42517159e+02, 4.42517159e+02, 4.41279648e+02,
        9.42177553e+01, 9.42177553e+01, 4.94904446e+01, 3.86980062e+

In [33]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]


df1
df1 = df1[:-3].astype('int')
df1
df1.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
nothing,33676,6108,5327,1694,5094
trigger,1249,442,89,77,1272
lumimask,1249,442,89,77,1272
metfilter,1249,441,88,77,1268
minjetkin,280,94,26,25,365
jetid,280,94,26,25,365
n2ddt,110,49,13,14,136
antiak4btagMediumOppHem,76,38,11,8,41
met,76,38,2,3,32
noleptons,76,36,0,0,22


In [34]:
df1 = df1[4:-2]
df1.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df1.to_latex(buf=year+'/cutflow-sig.tex')
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
Jet kinematics,280,94,26,25,365
Jet ID,280,94,26,25,365
Jet $N_2^\text{DDT}$,110,49,13,14,136
Opp. hem. b veto,76,38,11,8,41
MET $<$ 140 GeV,76,38,2,3,32
No leptons,76,36,0,0,22


In [35]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[:-3].astype('int')
df2.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
nothing,361026915,14529351,1812489,1126141,12841318,3250080
trigger,146787770,1737915,685003,16008,752384,89844
lumimask,146787770,1737915,685003,16008,752384,89844
metfilter,146381557,1731311,682351,15939,750007,89517
minjetkin,26845129,378811,152238,4722,205108,23369
jetid,26845129,378811,152238,4722,205108,23369
n2ddt,6451330,140762,61862,2565,76709,11519
antiak4btagMediumOppHem,5752435,127000,53270,2266,37771,5362
met,5709417,116530,52429,1759,30063,4256
noleptons,5578603,102913,42256,1384,20992,3309


In [36]:
df2 = df2[4:-2]
df2.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df2.to_latex(buf=year+'/cutflow-bkg.tex')
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
Jet kinematics,26845129,378811,152238,4722,205108,23369
Jet ID,26845129,378811,152238,4722,205108,23369
Jet $N_2^\text{DDT}$,6451330,140762,61862,2565,76709,11519
Opp. hem. b veto,5752435,127000,53270,2266,37771,5362
MET $<$ 140 GeV,5709417,116530,52429,1759,30063,4256
No leptons,5578603,102913,42256,1384,20992,3309
