In [10]:
import os, subprocess
import json
import uproot3
import awkward as ak
import numpy as np
from coffea import processor, util, hist
import pickle

from plotter import *

In [11]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.5
lumis['2018'] = 59.9

with open('xsec.json') as f:
  xs = json.load(f)

with open('pmap.json') as f:
  pmap = json.load(f)

systematics = ['nominal',
               'jet_triggerUp','jet_triggerDown',
               'btagWeightUp','btagWeightDown','btagEffStatUp','btagEffStatDown',
               'UESUp','UESDown','JESUp','JESDown','JERUp','JERDown',
              ]

In [12]:
year = '2017'
nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
outsum = processor.dict_accumulator()

In [13]:
repickle=True

# Check if pickle exists, and don't re-create it if it does
picklename = 'pickles/'+str(year)+'_templates.pkl'
if os.path.isfile(picklename):
    repickle=False

In [14]:
# Load all files - this takes a while
if repickle:
    nfiles = len(subprocess.getoutput("ls infiles-split/"+year+"*.json").split())
    for n in range(1,nfiles+1):

        with open('infiles-split/'+year+'_'+str(n)+'.json') as f:
            infiles = json.load(f)
    
        filename = '/myeosdir/vh-charm-category/outfiles/'+year+'_'+str(n)+'.coffea'
        #filename = 'outfiles/'+year+'_'+str(n)+'.coffea'
        if os.path.isfile(filename):
            out = util.load(filename)
            outsum.add(out)
        else:
            print('Missing file '+str(n),infiles.keys())
            #print("File " + filename + " is missing")
        
    scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
    outsum['templates'].scale(scale_lumi, 'dataset')
    
    # Use pmap to group the datasets together
    templates = outsum['templates'].group('dataset', hist.Cat('process', 'Process'), pmap)

    outfile = open(picklename, 'wb')
    pickle.dump(templates, outfile, protocol=-1)
    outfile.close()

In [15]:
# Read the histogram from the pickle file
templates = pickle.load(open(picklename,'rb'))

In [16]:
# check intergrals
templates.sum('pt1','msd1','ddb1').integrate('region','signal').integrate('systematic','nominal').values()

{('ZH',): array([1.06661833e-01, 2.58988235e-02, 2.19574069e-01, 1.72490456e-01,
        8.31461077e-01, 1.47377843e+00, 1.97304070e+00, 1.25867471e+00,
        2.56550903e-01, 1.72181723e-01, 4.70415066e-02, 7.15189669e-02,
        1.27983266e-01, 2.53765047e-04, 3.86540973e-04, 5.79798639e-02,
        1.47840516e-02, 3.49517975e-04, 5.43771185e-02, 8.50280473e-05,
        1.33685251e-02, 4.08514916e-04]),
 ('WH',): array([2.01347958e-01, 4.86615242e-01, 6.63751959e-01, 1.33145637e+00,
        2.29267075e+00, 2.01839456e+00, 5.96382569e-01, 8.55178632e-02,
        3.15479968e-02, 2.06484916e-03, 7.54301590e-02, 3.87644559e-02,
        1.83353759e-03, 5.12358181e-03, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.13300494e-02,
        3.67802951e-02, 0.00000000e+00]),
 ('ttH',): array([0.08184465, 0.09882349, 0.1512636 , 0.29845429, 0.39784496,
        0.43777151, 0.24216237, 0.18505042, 0.17008616, 0.17332713,
        0.14869979, 0.12033529, 

In [17]:
templates_vh = templates.sum('pt1').integrate('region','signal')

In [18]:
m2bins = [47, 68, 103, 201]
os.system('rm '+year+'/3m2-signalregion.root')
fout = uproot3.create(year+'/3m2-signalregion.root')

for i,b in enumerate(m2bins[:-1]):
    for p in pmap.keys(): 
        if p == "ttH" and year == '2016':
            continue
        print(p)
        if "data" in p:
            s = "nominal"
            h = templates_vh.integrate('systematic',s).integrate('msd2',int_range=slice(m2bins[i],m2bins[i+1])).integrate('ddb1',int_range=slice(0.89,1)).integrate('process',p)
            fout["pass_m2"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)
            h = templates_vh.integrate('systematic',s).integrate('msd2',int_range=slice(m2bins[i],m2bins[i+1])).integrate('ddb1',int_range=slice(0,0.89)).integrate('process',p)
            fout["fail_m2"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)
        else:
            for s in systematics:
                h = templates_vh.integrate('systematic',s).integrate('msd2',int_range=slice(m2bins[i],m2bins[i+1])).integrate('ddb1',int_range=slice(0.89,1)).integrate('process',p)
                fout["pass_m2"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)
                h = templates_vh.integrate('systematic',s).integrate('msd2',int_range=slice(m2bins[i],m2bins[i+1])).integrate('ddb1',int_range=slice(0,0.89)).integrate('process',p)
                fout["fail_m2"+str(i+1)+"_"+p+"_"+s] = hist.export1d(h)

fout.close()

ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata
ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata
ZH
WH
ttH
VBF
ggF
ggF-powheg
QCD
VV
Wjets
Zjets
ttbar
singlet
data
muondata
