In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.2

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2018'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [4]:
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
coffeadir_prefix = '/myeosdir/ggf-vbf/outfiles-ddb2/'+year+'/'

repickle = True
# Check if pickle exists, don't recreate it if it does
picklename = year+'/cutflow.pkl'
if os.path.isfile(picklename):
    repickle = False

In [5]:
if repickle:

    for n in range(1,nfiles+1):
    
        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = coffeadir_prefix+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)

            if n == 1:
                outsum['cutflow'] = out['cutflow']
                outsum['sumw'] = out['sumw']
            else:
                outsum['cutflow'].add(out['cutflow'])
                outsum['sumw'].add(out['sumw'])
                
            del out
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")  
            
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
    
    del outsum
    
    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()



In [6]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [7]:
cutflow = templates.sum('genflavor').integrate('region',['signal-ggf'])
cutflow

<Hist (process,cut) instance at 0x7f37a0c49f40>

In [8]:
cutflow.values()

{('ZH',): array([5.91226379e+03, 3.81252769e+02, 3.81252769e+02, 3.80000855e+02,
        1.35953493e+02, 1.35953493e+02, 8.40429911e+01, 3.89422510e+01,
        2.88015103e+01, 2.50478259e+01, 2.43115170e+01, 4.65015218e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('WH',): array([1.30720100e+04, 4.75406668e+02, 4.75406668e+02, 4.73979388e+02,
        1.73686685e+02, 1.73686685e+02, 1.04828759e+02, 5.29144091e+01,
        3.66765603e+01, 3.21836601e+01, 3.14251350e+01, 4.97918893e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttH',): array([8.75783872e+03, 1.49393039e+03, 1.49393039e+03, 1.49031321e+03,
        5.48543003e+02, 5.48543003e+02, 1.95402202e+02, 5.91990325e+01,
        4.61475112e+01, 3.09308606e+01, 3.02564584e+01, 2.23424997e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([1.19851352e+04, 5.63322808e+02, 5.63322808e+02, 5.61770506e+02,
        1.36294908e+02, 1.36294908e+02, 7.70127683e+01, 6.14768859e+

In [9]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]


df1
df1 = df1[:-3].astype('int')
df1
df1.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
nothing,64405,11985,13072,5912,8757
trigger,1348,563,475,381,1493
lumimask,1348,563,475,381,1493
metfilter,1345,561,473,380,1490
minjetkin,375,136,173,135,548
jetid,375,136,173,135,548
n2ddt,189,77,104,84,195
antiak4btagMediumOppHem,141,61,52,38,59
met,139,60,36,28,46
noleptons,135,58,32,25,30


In [10]:
df1 = df1[4:-2]
df1.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df1.to_latex(buf=year+'/cutflow-sig.tex')
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
Jet kinematics,375,136,173,135,548
Jet ID,375,136,173,135,548
Jet $N_2^\text{DDT}$,189,77,104,84,195
Opp. hem. b veto,141,61,52,38,59
MET $<$ 140 GeV,139,60,36,28,46
No leptons,135,58,32,25,30


In [11]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[:-3].astype('int')
df2.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
nothing,1542533361,25156826,3340706,2372304,22221060,3412929
trigger,157943429,2243608,877957,23764,930774,85883
lumimask,157943429,2243608,877957,23764,930774,85883
metfilter,157541072,2235965,874786,23680,928572,85611
minjetkin,34155964,554435,231291,7308,308357,27725
jetid,34155964,554435,231291,7308,308357,27725
n2ddt,9365381,225932,104881,4261,109112,13998
antiak4btagMediumOppHem,8518499,206664,91769,3819,54170,6464
met,8427795,184599,89871,2871,42598,4856
noleptons,8221161,159715,67499,2222,29629,3658


In [12]:
df2 = df2[4:-2]
df2.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df2.to_latex(buf=year+'/cutflow-bkg.tex')
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
Jet kinematics,34155964,554435,231291,7308,308357,27725
Jet ID,34155964,554435,231291,7308,308357,27725
Jet $N_2^\text{DDT}$,9365381,225932,104881,4261,109112,13998
Opp. hem. b veto,8518499,206664,91769,3819,54170,6464
MET $<$ 140 GeV,8427795,184599,89871,2871,42598,4856
No leptons,8221161,159715,67499,2222,29629,3658
