In [11]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist

%matplotlib inline
from matplotlib import lines as mlines
import matplotlib.pyplot as plt
from cycler import cycler

In [12]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.1
lumis['2018'] = 59.9

nfiles_mc = {}
nfiles_mc['2016'] = 64
nfiles_mc['2017'] = 89
nfiles_mc['2018'] = 106

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [13]:
year = '2018'
outsum = processor.dict_accumulator()

In [14]:
# Load all MC
for n in range(1,nfiles_mc[year]+1):
    print(n)
    filename = 'condor/outfiles/'+year+'_'+str(n)+'.coffea'
    if os.path.isfile(filename):
        out = util.load(filename)
        outsum.add(out)
    else:
        print("File " + filename + " is missing")
        
scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
outsum['cutflow'].scale(scale_lumi, 'dataset')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106


In [15]:
templates = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)



In [16]:
#pmap = {}
#pmap['otherH'] = ['ttH','WH','ZH','VBF']
#pmap['ggF'] = ['ggF']
#pmap['VV'] = ['VV']
#pmap['top'] = ['ttbar','singlet']
#pmap['Zjets'] = ['Zjets']
#pmap['Wjets'] = ['Wjets']
#pmap['QCD'] = ['QCD']

cutflow = templates.sum('genflavor').integrate('region','signal')

In [17]:
cutflow.values()

{('ZH',): array([228.17614955, 228.17614955, 123.60794949, 114.38166466,
        114.38166466,  68.7298133 ,  33.08095524,  23.63712481,
         18.10985468,   6.73228909,   0.        ]),
 ('WH',): array([231.15181474, 231.15181474, 135.54101607, 126.68822354,
        126.68822354,  78.9904871 ,  32.59748031,  30.21184778,
         22.62380825,   8.07146629,   0.        ]),
 ('ttH',): array([968.85262776, 968.85262776, 628.72547372, 524.60738784,
        524.60738784, 186.52347411,  55.5655823 ,  41.89660071,
         16.17302953,   4.08260177,   0.        ]),
 ('VBF',): array([369.10938106, 369.10938106, 134.89638076, 125.91111582,
        125.91111582,  71.94776556,  58.15957905,  56.20830311,
         45.86749434,  27.43118304,   0.        ]),
 ('ggF',): array([897.10341639, 897.10341639, 374.70891126, 345.37823385,
        345.37823385, 175.44640563, 133.00924972, 129.51608962,
        102.19545871,  60.05142593,   0.        ]),
 ('QCD',): array([1.15516711e+08, 1.15516711e+08, 3.

In [18]:
import pandas as pd

In [19]:
df1 = pd.DataFrame([])

df1['ggF'] = cutflow.values()[('ggF',)]
df1['VBF'] = cutflow.values()[('VBF',)]
df1['WH'] = cutflow.values()[('WH',)]
df1['ZH'] = cutflow.values()[('ZH',)]
df1['ttH'] = cutflow.values()[('ttH',)]

df1 = df1[3:-1].astype('int')
df1.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df1.to_latex(buf=year+'/cutflow-sig.tex')

In [20]:
df2 = pd.DataFrame([])

df2['QCD'] = cutflow.values()[('QCD',)]
df2['Wjets'] = cutflow.values()[('Wjets',)]
df2['Zjets'] = cutflow.values()[('Zjets',)]
df2['VV'] = cutflow.values()[('VV',)]
df2['ttbar'] = cutflow.values()[('ttbar',)]
df2['singlet'] = cutflow.values()[('singlet',)]

df2 = df2[3:-1].astype('int')
df2.index = ['Jet kinematics','Jet ID','Jet $N_2^\text{DDT}$','Opp. hem. b veto','MET $<$ 140 GeV','No leptons','DeepDoubleB']

df2.to_latex(buf=year+'/cutflow-bkg.tex')