In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd

%matplotlib inline
from matplotlib import lines as mlines
import matplotlib.pyplot as plt
from cycler import cycler

import mplhep as hep
plt.style.use([hep.style.CMS])

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2018'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [4]:
# Load all files
for n in range(1,nfiles+1):

    with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
      infiles = json.load(f)
    
    filename = '/myeosdir/inclusive/outfiles/'+year+'_'+str(n)+'.coffea'
    #filename = 'outfiles/'+year+'_'+str(n)+'.coffea'
    if os.path.isfile(filename):
        out = util.load(filename)
        outsum.add(out)
    else:
        print(n,infiles.keys())
        #print("File " + filename + " is missing")
    
scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
scale_lumi['JetHT'] = 1
scale_lumi['SingleMuon'] = 1

outsum['cutflow'].scale(scale_lumi, 'dataset')
print('ok')

ok


In [5]:
templates = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
cutflow = templates.sum('genflavor').integrate('region','signal')
cutflow.values()



{('ZH',): array([3.12033935e+04, 3.12033935e+04, 3.12033935e+04, 1.38076095e+02,
        1.38076095e+02, 1.30405423e+02, 8.14398908e+01, 3.74893147e+01,
        2.78374766e+01, 2.41635698e+01, 8.87292486e+00]),
 ('WH',): array([4.36713093e+04, 4.36713093e+04, 4.36713093e+04, 1.76614983e+02,
        1.76614983e+02, 1.67200069e+02, 1.01521393e+02, 5.13206187e+01,
        3.54712239e+01, 3.11473808e+01, 1.11907438e+01]),
 ('ggF',): array([9.32646289e+05, 9.32646289e+05, 9.32646289e+05, 3.66405552e+02,
        3.66405552e+02, 3.49889985e+02, 1.79292766e+02, 1.36392254e+02,
        1.33660739e+02, 1.30036482e+02, 7.70616706e+01]),
 ('ggF-powheg',): array([1.64726909e+06, 1.64726909e+06, 1.64726909e+06, 6.69207156e+02,
        6.69207156e+02, 6.41220091e+02, 3.34385417e+02, 2.70267410e+02,
        2.64773800e+02, 2.57414484e+02, 1.58992017e+02]),
 ('QCD',): array([1.62783470e+10, 1.62783470e+10, 1.62783470e+10, 3.30954119e+07,
        3.30954119e+07, 3.07084046e+07, 8.39215898e+06, 7.6401067

In [8]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
#df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
#df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:].astype('int')
df1.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [9]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:].astype('int')
df2.index = ['Jet kinematics','Jet ID','Jet acceptance','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')