In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd

%matplotlib inline
from matplotlib import lines as mlines
import matplotlib.pyplot as plt
from cycler import cycler

import mplhep as hep
plt.style.use([hep.style.CMS])

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2017'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [None]:
repickle=True

# Check if pickle exists, and don't re-create it if it does
picklename = 'pickles/cutflow.pkl'
if os.path.isfile(picklename):
    repickle=False

In [4]:
# Load all files - this takes a while
if repickle:
    nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
    for n in range(1,nfiles+1):

        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = '/myeosdir/vh-charm-category/outfiles/'+year+'_'+str(n)+'.coffea'
        #filename = 'outfiles/'+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)
            outsum.add(out)
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")
        
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    
    # Use pmap to group the datasets together
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)

    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()

2416 dict_keys(['WJetsToLNu_HT-800To1200_TuneCP5_13TeV-madgraphMLM-pythia8'])
ok




In [5]:
# Read the histogram from the pickle file
cutflow = pickle.load(open(picklename,'rb')).sum('genflavor').integrate('region','signal')

In [6]:
cutflow.values()

{('ZH',): array([2.21741899e+04, 2.21741899e+04, 2.21741899e+04, 9.48949892e+01,
        4.03883333e+01, 4.03883333e+01, 3.73628114e+01, 1.36731132e+01,
        5.14012102e+00, 4.77083411e+00, 4.23354064e+00, 2.08320062e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('WH',): array([2.30625959e+04, 2.30625959e+04, 2.30625959e+04, 9.13328086e+01,
        3.96662303e+01, 3.96662303e+01, 3.61224229e+01, 1.21142813e+01,
        5.31855709e+00, 4.92417032e+00, 4.69081522e+00, 2.26576327e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ttH',): array([8.62359138e+03, 8.62359138e+03, 8.62359138e+03, 4.38079156e+02,
        1.84988537e+02, 1.84988537e+02, 1.28767131e+02, 1.46394160e+01,
        4.52637109e+00, 3.56487932e+00, 2.34908087e+00, 7.05475798e-01,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([8.95943423e+04, 8.95943423e+04, 8.95943423e+04, 9.00098663e+01,
        2.35016558e+01, 2.35016558e+01, 2.14525434e+01, 4.44141886e+

In [7]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:-3].astype('int')
df1.index = ['Jet 1 kinematics','Jet 2 kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [8]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:-3].astype('int')
df2.index = ['Jet 1 kinematics','Jet 2 kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')