In [28]:
import os, subprocess
import json
import uproot3
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pandas as pd
import pickle

from plotter import *

In [29]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.2

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

In [30]:
year = '2016'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

ddbthr = 0.64

In [31]:
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
coffeadir_prefix = '/myeosdir/ggf-vbf/outfiles-ddb2/'

repickle = True
# Check if pickle exists, remove it if it does
picklename = 'pickles/'+str(year)+'_cutflow.pkl'
if os.path.isfile(picklename):
    repickle = False

In [32]:
if repickle:

    for n in range(1,nfiles+1):
    
        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = coffeadir_prefix+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)

            if n == 1:
                outsum['cutflow'] = out['cutflow']
                outsum['sumw'] = out['sumw']
            else:
                outsum['cutflow'].add(out['cutflow'])
                outsum['sumw'].add(out['sumw'])
            
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['cutflow'].scale(scale_lumi, 'dataset')
    cutflow = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)
    
    outfile = open(picklename, 'wb')
    pickle.dump(cutflow, outfile, protocol=-1)
    outfile.close()



In [33]:
cutflow = pickle.load(open('pickles/'+str(year)+'_cutflow.pkl','rb'))
cutflow = cutflow.sum('genflavor').integrate('region','muoncontrol')

In [34]:
cutflow.integrate('process','muondata').values()

{(): array([4.0017229e+07, 1.0919550e+06, 1.0919550e+06, 3.4411500e+05,
        1.0858200e+05, 1.1222000e+04, 7.2410000e+03, 3.8790000e+03,
        3.6830000e+03, 3.6350000e+03, 2.0000000e+01, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00])}

In [35]:
cutflow.integrate('process','ttbar').values()

{(): array([2.45037266e+07, 3.42253072e+05, 3.42253072e+05, 1.38608815e+05,
        7.02034556e+04, 8.69602838e+03, 6.09044144e+03, 3.44972338e+03,
        3.44972338e+03, 3.44972338e+03, 2.27631301e+01, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00])}

In [36]:
df1 = pd.DataFrame([])

df1['QCD'] = cutflow.values()[('QCD',)]
df1['Wjets'] = cutflow.values()[('Wjets',)]
df1['Zjets'] = cutflow.values()[('Zjets',)]
df1['VV'] = cutflow.values()[('VV',)]
df1['ttbar'] = cutflow.values()[('ttbar',)]
df1['singlet'] = cutflow.values()[('singlet',)]
df1['data'] = cutflow.values()[('muondata',)]

df1 = df1.astype('int')[:11]
df1.index=['nothing', 'minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask','ddbpass']
#df1.index = ['Jet kinematics', 'Jet ID', 'n2ddt', 'b-tag', 'no e/tau', 'one muon', 'Dphi(muon,AK8)','DeepDoubleB']
df1[1:11].to_latex(buf=year+'/cutflow-muoncr.tex')

In [37]:
df1

Unnamed: 0,QCD,Wjets,Zjets,VV,ttbar,singlet,data
nothing,371373188,23187997,4293797,3254018,24503726,9411117,40017229
minjetkinmu,48413035,623735,244755,7453,342253,37672,1091955
jetid,48413035,623735,244755,7453,342253,37672,1091955
n2ddt,11534243,236173,100994,4092,138608,19025,344115
ak4btagMedium08,1209849,22343,14035,468,70203,10099,108582
onemuon,2445,909,166,16,8696,986,11222
muonkin,530,578,91,9,6090,604,7241
muonDphiAK8,353,213,26,5,3449,406,3879
muontrigger,353,213,26,5,3449,406,3683
lumimask,353,213,26,5,3449,406,3635
