In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist

%matplotlib inline
from matplotlib import lines as mlines
import matplotlib.pyplot as plt
from cycler import cycler

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.1
lumis['2018'] = 59.9

nfiles_mc = {}
nfiles_mc['2016'] = 64
nfiles_mc['2017'] = 89
nfiles_mc['2018'] = 106

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [3]:
year = '2017'
outsum = processor.dict_accumulator()

In [4]:
# Load all MC
for n in range(1,nfiles_mc[year]+1):
    print(n)
    filename = 'condor/outfiles/'+year+'_'+str(n)+'.coffea'
    if os.path.isfile(filename):
        out = util.load(filename)
        outsum.add(out)
    else:
        print("File " + filename + " is missing")
        
scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
outsum['cutflow'].scale(scale_lumi, 'dataset')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


In [5]:
templates = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)



In [6]:
#pmap = {}
#pmap['otherH'] = ['ttH','WH','ZH','VBF']
#pmap['ggF'] = ['ggF']
#pmap['VV'] = ['VV']
#pmap['top'] = ['ttbar','singlet']
#pmap['Zjets'] = ['Zjets']
#pmap['Wjets'] = ['Wjets']
#pmap['QCD'] = ['QCD']

cutflow = templates.sum('genflavor').integrate('region','signal')

In [7]:
cutflow.values()

{('ZH',): array([174.18392503, 174.18392503,  83.45180041,  77.96161234,
         77.96161234,  45.66447444,  22.49749974,  16.48989197,
         15.31191159,   6.07479185,   0.        ]),
 ('WH',): array([171.6207834 , 171.6207834 ,  91.29798653,  84.8858634 ,
         84.8858634 ,  50.77807897,  22.6219888 ,  20.99389898,
         19.64278087,   6.95914943,   0.        ]),
 ('ttH',): array([757.1606004 , 757.1606004 , 437.62353496, 363.5583228 ,
        363.5583228 , 126.51982336,  41.54010013,  31.03419963,
         19.36214128,   5.19279633,   0.        ]),
 ('VBF',): array([299.34603485, 299.34603485,  91.70098391,  85.73033852,
         85.73033852,  47.43507135,  38.72485992,  37.33776814,
         35.57059528,  23.29943366,   0.        ]),
 ('ggF',): array([727.52485495, 727.52485495, 254.37955509, 234.98566787,
        234.98566787, 112.15920378,  88.71344419,  85.5445165 ,
         81.15868831,  49.32000262,   0.        ]),
 ('QCD',): array([9.54814791e+07, 9.54814791e+07, 2.

In [8]:
import pandas as pd

In [9]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:-1].astype('int')
df1.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [10]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:-1].astype('int')
df2.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')