In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

In [3]:
with open('lumi.json') as f:
    lumis = json.load(f)

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2016APV'
infiles = subprocess.getoutput("ls infiles/"+year+"*.json").split()
coffeadir_prefix = 'outfiles-plots/'
outsum = processor.dict_accumulator()

In [4]:
repickle = True
# Check if pickle exists, don't recreate it if it does
picklename = year+'/cutflow.pkl'
if os.path.isfile(picklename):
    repickle = False

In [5]:
if repickle:

    for this_file in infiles:
    
        print(this_file)
        index = this_file.split("_")[1].split(".json")[0]
        filename = coffeadir_prefix+year+"_dask_"+index+".coffea"
        
        with open(this_file, 'r') as openfile:
            samples = json.load(openfile)

        print(filename)
        if os.path.isfile(filename):
            out = util.load(filename)

            if len(outsum.keys()) == 0:
                outsum['cutflow'] = out['cutflow']
                outsum['sumw'] = out['sumw']
            else:
                outsum['cutflow'].add(out['cutflow'])
                outsum['sumw'].add(out['sumw'])
                
            del out
        else:
            print('Missing file '+index,index)
            #print("File " + filename + " is missing")  
            
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
    
    del outsum
    
    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()

infiles/2018_data.json
outfiles-plots/2018_dask_data.coffea
infiles/2018_higgs.json
outfiles-plots/2018_dask_higgs.coffea
infiles/2018_qcd.json
outfiles-plots/2018_dask_qcd.coffea
infiles/2018_top.json
outfiles-plots/2018_dask_top.coffea
infiles/2018_wandvv.json
outfiles-plots/2018_dask_wandvv.coffea
infiles/2018_z.json
outfiles-plots/2018_dask_z.coffea




In [6]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [7]:
cutflow = templates.sum('genflavor').integrate('region',['signal-ggf'])
cutflow

<Hist (process,cut) instance at 0x7f9cd3472e20>

In [8]:
cutflow.values()

{('ZH',): array([5990.76358155,  388.34475501,  388.34475501,  387.09692878,
         102.04197849,  102.04197849,   60.92694954,   44.49824194,
          33.5792498 ,   29.62805128,   29.06628904,    7.84782694,
           0.        ,    0.        ,    0.        ]),
 ('WH',): array([1.32383085e+04, 4.83906994e+02, 4.83906994e+02, 4.82459714e+02,
        1.30016860e+02, 1.30016860e+02, 7.51846746e+01, 6.40405389e+01,
        4.69906836e+01, 4.13341088e+01, 4.04683827e+01, 9.67926532e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttH',): array([8.89451442e+03, 1.52325916e+03, 1.52325916e+03, 1.51957695e+03,
        4.01597978e+02, 4.01597978e+02, 1.49171521e+02, 6.68973200e+01,
        5.22934573e+01, 3.49473814e+01, 3.41986011e+01, 3.47747662e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([12147.17498147,   574.96606892,   574.96606892,   573.3719195 ,
          155.72837271,   155.72837271,    99.55274427,    93.73253152,
           

In [9]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]


df1
df1 = df1[:-3].astype('int')
df1
df1.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df1

Unnamed: 0,ggF,VBF,WH,ZH,ttH
nothing,65202,12147,13238,5990,8894
trigger,1371,574,483,388,1523
lumimask,1371,574,483,388,1523
metfilter,1368,573,482,387,1519
minjetkin,355,155,130,102,401
jetid,355,155,130,102,401
n2ddt,213,99,75,60,149
antiak4btagMediumOppHem,192,93,64,44,66
met,186,90,46,33,52
noleptons,182,87,41,29,34


In [10]:
df1 = df1[4:-2]
df1.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df1.to_latex(buf=year+'/cutflow-sig.tex')
df1

  df1.to_latex(buf=year+'/cutflow-sig.tex')


Unnamed: 0,ggF,VBF,WH,ZH,ttH
Jet kinematics,355,155,130,102,401
Jet ID,355,155,130,102,401
Jet $N_2^\text{DDT}$,213,99,75,60,149
Opp. hem. b veto,192,93,64,44,66
MET $<$ 140 GeV,186,90,46,33,52
No leptons,182,87,41,29,34


In [11]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[:-3].astype('int')
df2.index = ['nothing','trigger','lumimask','metfilter','minjetkin','jetid','n2ddt','antiak4btagMediumOppHem','met','noleptons','notvbf','ddbpass']
df2

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
nothing,1565214422,25514735,3396628,2400926,22518079,3456404
trigger,160228598,2276397,891111,24120,948291,87511
lumimask,160228598,2276397,891111,24120,948291,87511
metfilter,159820835,2268650,887894,24036,946051,87234
minjetkin,24294640,414949,185464,6047,242337,19668
jetid,24294640,414949,185464,6047,242337,19668
n2ddt,7304704,194962,96413,3761,89425,9309
antiak4btagMediumOppHem,7034842,186852,90888,3572,56912,6144
met,6963507,168978,88731,2791,45069,4751
noleptons,6794843,147387,65568,2207,30801,3536


In [12]:
df2 = df2[4:-2]
df2.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons']
df2.to_latex(buf=year+'/cutflow-bkg.tex')
df2

  df2.to_latex(buf=year+'/cutflow-bkg.tex')


Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet
Jet kinematics,24294640,414949,185464,6047,242337,19668
Jet ID,24294640,414949,185464,6047,242337,19668
Jet $N_2^\text{DDT}$,7304704,194962,96413,3761,89425,9309
Opp. hem. b veto,7034842,186852,90888,3572,56912,6144
MET $<$ 140 GeV,6963507,168978,88731,2791,45069,4751
No leptons,6794843,147387,65568,2207,30801,3536
