In [1]:
import os, subprocess
import json
import uproot3
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pickle

from plotter import *

In [2]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

systematics = ['nominal',
               'jet_triggerUp','jet_triggerDown',
               'btagWeightUp','btagWeightDown','btagEffStatUp','btagEffStatDown',
               'UESUp','UESDown','JESUp','JESDown','JERUp','JERDown',
               'pileup_weightUp','pileup_weightDown'
              ]

In [3]:
deta_cut = 3.5
mjj_cut = 1000

deta_cut_mucr = 0
mjj_cut_mucr = 0

In [4]:
year = '2017'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [5]:
repickle=True

# Check if pickle exists, and don't re-create it if it does
picklename = 'pickles/'+str(year)+'_templates.pkl'
if os.path.isfile(picklename):
    repickle=False

In [6]:
# Load all files - this takes a while
if repickle:
    nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
    for n in range(1,nfiles+1):

        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = '/myeosdir/vbf-category/outfiles-ddb2/'+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)
            outsum.add(out)
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")
        
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['templates-vbf'].scale(scale_lumi, 'dataset')
    templates = outsum['templates-vbf'].group('dataset', hist.Cat('process', 'Process'), pmap)

    outfile = open(picklename, 'wb')
    pickle.dump(templates, outfile, protocol=-1)
    outfile.close()

In [7]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))
templates_vbf = templates.integrate('region','signal').integrate('deta',int_range=slice(deta_cut,7)).integrate('mjj',int_range=slice(mjj_cut,4000))
templates_vbf_mucr = templates.integrate('region','muoncontrol').integrate('deta',int_range=slice(deta_cut_mucr,7)).integrate('mjj',int_range=slice(mjj_cut_mucr,4000))

In [8]:
templates_vbf.sum('pt1','msd1').integrate('ddb1',int_range=slice(0,0.7)).integrate('systematic','nominal').values()

{('ZH',): array(0.18767054),
 ('WH',): array(0.376012),
 ('ttH',): array(0.45801603),
 ('VBF',): array(10.90603916),
 ('ggF',): array(2.02792091),
 ('ggF-powheg',): array(6.50695701),
 ('QCD',): array(175302.67432765),
 ('VV',): array(46.28441001),
 ('Wjets',): array(3555.79265863),
 ('Zjets',): array(1572.72830472),
 ('ttbar',): array(649.85978676),
 ('singlet',): array(136.04388334),
 ('data',): array(181560.),
 ('muondata',): array(4473.)}

In [9]:
templates_vbf.identifiers('systematic')

[<StringBin (JERDown) instance at 0x7fa081071ac0>,
 <StringBin (JERUp) instance at 0x7fa081071a60>,
 <StringBin (JESDown) instance at 0x7fa081071940>,
 <StringBin (JESUp) instance at 0x7fa0810718b0>,
 <StringBin (UESDown) instance at 0x7fa081071a00>,
 <StringBin (UESUp) instance at 0x7fa0810719a0>,
 <StringBin (W_d2kappa_EWDown) instance at 0x7fa081071ee0>,
 <StringBin (W_d2kappa_EWUp) instance at 0x7fa081071c40>,
 <StringBin (W_d3kappa_EWDown) instance at 0x7fa081071f70>,
 <StringBin (W_d3kappa_EWUp) instance at 0x7fa081071c70>,
 <StringBin (Z_d2kappa_EWDown) instance at 0x7fa081071fd0>,
 <StringBin (Z_d2kappa_EWUp) instance at 0x7fa081063040>,
 <StringBin (Z_d3kappa_EWDown) instance at 0x7fa081063100>,
 <StringBin (Z_d3kappa_EWUp) instance at 0x7fa0810630a0>,
 <StringBin (btagEffStatDown) instance at 0x7fa0810716a0>,
 <StringBin (btagEffStatUp) instance at 0x7fa081071700>,
 <StringBin (btagWeightDown) instance at 0x7fa0810714f0>,
 <StringBin (btagWeightUp) instance at 0x7fa081071670>

In [10]:
os.system('rm '+year+'/1-signalregion.root')
fout = uproot3.create(year+'/1-signalregion.root')
for p in pmap.keys():  
    print(p)
    if "data" in p:
        s = "nominal"
        h = templates_vbf.sum('pt1').integrate('systematic',s).integrate('ddb1',int_range=slice(0.7,1)).integrate('process',p)

        fout["pass_"+p+"_"+s] = hist.export1d(h)
        h = templates_vbf.sum('pt1').integrate('systematic',s).integrate('ddb1',int_range=slice(0,0.7)).integrate('process',p)
        fout["fail_"+p+"_"+s] = hist.export1d(h)
    else:
        for s in systematics:
            h = templates_vbf.sum('pt1').integrate('systematic',s).integrate('ddb1',int_range=slice(0.7,1)).integrate('process',p)
            fout["pass_"+p+"_"+s] = hist.export1d(h)
            h = templates_vbf.sum('pt1').integrate('systematic',s).integrate('ddb1',int_range=slice(0,0.7)).integrate('process',p)
            fout["fail_"+p+"_"+s] = hist.export1d(h)

fout.close()

ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata


In [11]:
ptbins = [450, 550, 1200]
os.system('rm '+year+'/2pt-signalregion.root')
fout = uproot3.create(year+'/2pt-signalregion.root')

for i,b in enumerate(ptbins[:-1]):
    for p in pmap.keys(): 
        print(p)
        if "data" in p:
            s = "nominal"
            h = templates_vbf.integrate('systematic',s).integrate('pt1',int_range=slice(ptbins[i],ptbins[i+1])).integrate('ddb1',int_range=slice(0.7,1)).integrate('process',p)
            fout["pass_pt"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)
            h = templates_vbf.integrate('systematic',s).integrate('pt1',int_range=slice(ptbins[i],ptbins[i+1])).integrate('ddb1',int_range=slice(0,0.7)).integrate('process',p)
            fout["fail_pt"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)
        else:
            for s in systematics:
                h = templates_vbf.integrate('systematic',s).integrate('pt1',int_range=slice(ptbins[i],ptbins[i+1])).integrate('ddb1',int_range=slice(0.7,1)).integrate('process',p)
                fout["pass_pt"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)
                h = templates_vbf.integrate('systematic',s).integrate('pt1',int_range=slice(ptbins[i],ptbins[i+1])).integrate('ddb1',int_range=slice(0,0.7)).integrate('process',p)
                fout["fail_pt"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)

fout.close()

ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata
ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata


In [12]:
os.system('rm '+year+'/muonCR.root')
fout = uproot3.create(year+'/muonCR.root')
for p in pmap.keys():  
    print(p)
    if "data" in p:
        s = "nominal"
        h = templates_vbf_mucr.integrate('systematic',s).sum('pt1').integrate('ddb1',int_range=slice(0.7,1)).integrate('process',p)
        fout["pass_"+p+"_"+s] = hist.export1d(h)
        h = templates_vbf_mucr.integrate('systematic',s).sum('pt1').integrate('ddb1',int_range=slice(0,0.7)).integrate('process',p)
        fout["fail_"+p+"_"+s] = hist.export1d(h)
    else:
        for s in systematics:
            h = templates_vbf_mucr.integrate('systematic',s).sum('pt1').integrate('ddb1',int_range=slice(0.7,1)).integrate('process',p)
            fout["pass_"+p+"_"+s] = hist.export1d(h)
            h = templates_vbf_mucr.integrate('systematic',s).sum('pt1').integrate('ddb1',int_range=slice(0,0.7)).integrate('process',p)
            fout["fail_"+p+"_"+s] = hist.export1d(h)

ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata
