In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd

%matplotlib inline
from matplotlib import lines as mlines
import matplotlib.pyplot as plt
from cycler import cycler

import mplhep as hep
plt.style.use([hep.style.CMS])

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2016'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [4]:
# Load all files
for n in range(1,nfiles+1):

    with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
        infiles = json.load(f)
    
    filename = '/myeosdir/vbf-category/outfiles/'+year+'_'+str(n)+'.coffea'
    if os.path.isfile(filename):
        out = util.load(filename)
        outsum.add(out)
    else:
        print(n,infiles.keys())
        #print("File " + filename + " is missing")
        
scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
outsum['cutflow'].scale(scale_lumi, 'dataset')
print('ok')

ok


In [5]:
templates = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)



In [6]:
cutflow = templates.sum('genflavor').integrate('region','signal-vbf')
cutflow

<Hist (process,cut) instance at 0x7f6db4d27e20>

In [7]:
cutflow.values()

{('ZH',): array([5.50621125e+03, 5.50621125e+03, 5.50621125e+03, 2.90058789e+01,
        2.90058789e+01, 2.73817148e+01, 1.55299352e+01, 9.53835408e+00,
        3.19797692e+00, 1.02287733e+00, 7.77878429e-02, 6.78309021e-02,
        2.61183212e-02, 0.00000000e+00, 0.00000000e+00]),
 ('WH',): array([8.52750939e+03, 8.52750939e+03, 8.52750939e+03, 2.98620460e+01,
        2.98620460e+01, 2.84034050e+01, 1.44712959e+01, 1.26972382e+01,
        2.94519613e+00, 5.60400679e-01, 2.45239872e-02, 2.09284462e-02,
        7.17907555e-03, 0.00000000e+00, 0.00000000e+00]),
 ('VBF',): array([7.19234165e+04, 7.19234165e+04, 7.19234165e+04, 1.00297962e+02,
        1.00297962e+02, 9.79805667e+01, 5.24014636e+01, 4.18383037e+01,
        4.10917927e+01, 3.98142096e+01, 1.72550871e+01, 1.63211343e+01,
        8.95972822e+00, 0.00000000e+00, 0.00000000e+00]),
 ('ggF',): array([5.51373724e+05, 5.51373724e+05, 5.51373724e+05, 2.90864882e+02,
        2.90864882e+02, 2.75850854e+02, 1.14217158e+02, 7.96853335e+

In [8]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
#df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:-3].astype('int')
df1.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','2 AK4 jets','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [9]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:-3].astype('int')
df2.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','2 AK4 jets','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')