In [13]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

import mplhep as hep
plt.style.use([hep.style.CMS])

In [14]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [15]:
year = '2017'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [16]:
repickle=True

# Check if pickle exists, and don't re-create it if it does
picklename = 'pickles/cutflow.pkl'
if os.path.isfile(picklename):
    repickle=False

In [17]:
# Load all files - this takes a while
if repickle:
    nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
    for n in range(1,nfiles+1):

        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = '/myeosdir/vh-charm-category/outfiles/'+year+'_'+str(n)+'.coffea'
        #filename = 'outfiles/'+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)
            outsum.add(out)
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")
        
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    
    # Use pmap to group the datasets together
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)

    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()

Missing file 2416 dict_keys(['WJetsToLNu_HT-800To1200_TuneCP5_13TeV-madgraphMLM-pythia8'])




In [18]:
# Read the histogram from the pickle file
cutflow = pickle.load(open(picklename,'rb')).sum('genflavor').integrate('region','signal')

In [19]:
cutflow.values()

{('ZH',): array([2.21183240e+04, 2.21183240e+04, 2.21183240e+04, 6.82273218e+01,
        3.21740575e+01, 3.21740575e+01, 2.92637050e+01, 1.07797661e+01,
        8.20117272e+00, 7.63554111e+00, 6.87884939e+00, 4.77129177e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('WH',): array([2.30139165e+04, 2.30139165e+04, 2.30139165e+04, 7.06732879e+01,
        3.20572060e+01, 3.20572060e+01, 2.87132755e+01, 9.64926782e+00,
        8.97274044e+00, 8.26274770e+00, 7.93459443e+00, 5.43290897e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttH',): array([8.40765801e+03, 8.40765801e+03, 8.40765801e+03, 3.42942689e+02,
        1.52978204e+02, 1.52978204e+02, 1.00357224e+02, 1.15227760e+01,
        5.85677366e+00, 4.68593816e+00, 3.06788387e+00, 1.25148068e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([8.95147457e+04, 8.95147457e+04, 8.95147457e+04, 1.02105245e+02,
        1.93140746e+01, 1.93140746e+01, 1.74326399e+01, 3.46147984e+

In [20]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:-3].astype('int')
df1.index = ['Jet 1 kinematics','Jet 2 kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [21]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:-3].astype('int')
df2.index = ['Jet 1 kinematics','Jet 2 kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')