In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

In [2]:
with open('lumi.json') as f:
    lumis = json.load(f)

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2016APV'
infiles = subprocess.getoutput("ls infiles/"+year+"*.json").split()
coffeadir_prefix = 'outfiles-plots/'
outsum = processor.dict_accumulator()

In [4]:
repickle = True
# Check if pickle exists, don't recreate it if it does
picklename = year+'/cutflow.pkl'
#if os.path.isfile(picklename):
#    repickle = False

In [5]:
if repickle:

    for this_file in infiles:
    
        print(this_file)
        index = this_file.split("_")[1].split(".json")[0]
        filename = coffeadir_prefix+year+"_dask_"+index+".coffea"
        
        with open(this_file, 'r') as openfile:
            samples = json.load(openfile)

        print(filename)
        if os.path.isfile(filename):
            out = util.load(filename)

            if len(outsum.keys()) == 0:
                outsum['cutflow'] = out['cutflow']
                outsum['sumw'] = out['sumw']
            else:
                outsum['cutflow'].add(out['cutflow'])
                outsum['sumw'].add(out['sumw'])
                
            del out
        else:
            print('Missing file '+index,index)
            #print("File " + filename + " is missing")  
            
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
    
    del outsum
    
    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()

infiles/2016APV_DYJetsToLL.json
outfiles-plots/2016APV_dask_DYJetsToLL.coffea
infiles/2016APV_Diboson.json
outfiles-plots/2016APV_dask_Diboson.coffea
infiles/2016APV_EWKV.json
outfiles-plots/2016APV_dask_EWKV.coffea
infiles/2016APV_GluGluHToBB.json
outfiles-plots/2016APV_dask_GluGluHToBB.coffea
infiles/2016APV_JetHTData.json
outfiles-plots/2016APV_dask_JetHTData.coffea
infiles/2016APV_QCD.json
outfiles-plots/2016APV_dask_QCD.coffea
infiles/2016APV_SingleMuData.json
outfiles-plots/2016APV_dask_SingleMuData.coffea
infiles/2016APV_SingleTop.json
outfiles-plots/2016APV_dask_SingleTop.coffea
infiles/2016APV_TTbar.json
outfiles-plots/2016APV_dask_TTbar.coffea
infiles/2016APV_VBFHToBB.json
outfiles-plots/2016APV_dask_VBFHToBB.coffea
infiles/2016APV_WHToBB.json
outfiles-plots/2016APV_dask_WHToBB.coffea
infiles/2016APV_WJetsToLNu.json
outfiles-plots/2016APV_dask_WJetsToLNu.coffea
infiles/2016APV_WJetsToQQ.json
outfiles-plots/2016APV_dask_WJetsToQQ.coffea
infiles/2016APV_ZHToBB.json
outfiles-plo



In [6]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [7]:
cutflow = templates.sum('genflavor').integrate('region',['signal-ggf'])
cutflow

<Hist (process,cut) instance at 0x7f254cfa50a0>

In [8]:
cutflow.values()

{('QCD',): array([3.31196184e+08, 3.31196184e+08, 3.31196184e+08, 3.30544312e+08,
        8.40276158e+06, 8.40276158e+06, 2.24300537e+06, 1.82050958e+06,
        1.80362035e+06, 1.73864508e+06, 1.66276213e+06, 4.46845434e+03,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttbar',): array([7.42613247e+06, 7.42613247e+06, 7.42613247e+06, 7.41939342e+06,
        7.31832287e+04, 7.31832287e+04, 2.61892809e+04, 1.09600044e+04,
        8.60020089e+03, 5.51384856e+03, 5.35519742e+03, 5.12038488e+01,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('singlet',): array([1.95929255e+06, 1.95929255e+06, 1.95929255e+06, 1.95788353e+06,
        8.56030010e+03, 8.56030010e+03, 3.56697466e+03, 1.62811562e+03,
        1.30472855e+03, 9.48910145e+02, 9.05554361e+02, 1.07555791e+01,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('Wjets',): array([3.36421200e+07, 3.36421200e+07, 3.36421200e+07, 3.36148378e+07,
        1.52713372e+05, 1.52713372e+05, 6.12478633e+04, 4.

In [9]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]


df1
df1 = df1[:-3].astype('int')
df1
df1.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
nothing,1504,6409,5609,2332,3409
trigger,1504,6409,5609,2332,3409
lumimask,1504,6409,5609,2332,3409
metfilter,1501,6404,5606,2330,3405
minjetkin,143,64,53,39,151
jetid,143,64,53,39,151
n2ddt,74,37,27,20,54
antiak4btagMediumOppHem,51,28,17,11,14
met,50,26,12,8,10
noleptons,48,25,10,6,6


In [10]:
df1 = df1[4:-2]
df1.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df1.to_latex(buf=year+'/cutflow-sig.tex')
df1

  df1.to_latex(buf=year+'/cutflow-sig.tex')


Unnamed: 0,ggF,VBF,WH,ZH,ttH
Jet kinematics,143,64,53,39,151
Jet ID,143,64,53,39,151
Jet $N_2^\text{DDT}$,74,37,27,20,54
Opp. hem. b veto,51,28,17,11,14
MET $<$ 140 GeV,50,26,12,8,10
No leptons,48,25,10,6,6


In [11]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['EWKW'] = cutflow.values()[('EWKW',)]
df2['EWKZ'] = cutflow.values()[('EWKZ',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[:-3].astype('int')
df2.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df2

Unnamed: 0,QCD,Wjets,Zjets,EWKW,EWKZ,VV,ttbar,singlet
nothing,331196183,33642120,2841273,1062535,143850,972090,7426132,1959292
trigger,331196183,33642120,2841273,1062535,143850,972090,7426132,1959292
lumimask,331196183,33642120,2841273,1062535,143850,972090,7426132,1959292
metfilter,330544312,33614837,2835819,1061413,143648,971404,7419393,1957883
minjetkin,8402761,152713,67488,9793,1584,2416,73183,8560
jetid,8402761,152713,67488,9793,1584,2416,73183,8560
n2ddt,2243005,61247,30004,5269,844,1361,26189,3566
antiak4btagMediumOppHem,1820509,48577,23102,3258,634,1053,10960,1628
met,1803620,45906,22672,1923,614,801,8600,1304
noleptons,1738645,42456,17331,1542,436,616,5513,948


In [12]:
df2 = df2[4:-2]
df2.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df2.to_latex(buf=year+'/cutflow-bkg.tex')
df2

  df2.to_latex(buf=year+'/cutflow-bkg.tex')


Unnamed: 0,QCD,Wjets,Zjets,EWKW,EWKZ,VV,ttbar,singlet
Jet kinematics,8402761,152713,67488,9793,1584,2416,73183,8560
Jet ID,8402761,152713,67488,9793,1584,2416,73183,8560
Jet $N_2^\text{DDT}$,2243005,61247,30004,5269,844,1361,26189,3566
Opp. hem. b veto,1820509,48577,23102,3258,634,1053,10960,1628
MET $<$ 140 GeV,1803620,45906,22672,1923,614,801,8600,1304
No leptons,1738645,42456,17331,1542,436,616,5513,948
