In [1]:
import os, subprocess
import json
import uproot
import awkward as ak
import numpy as np
from coffea import processor, util, hist

%matplotlib inline
import matplotlib.pyplot as plt
from cycler import cycler

import mplhep as hep
plt.style.use([hep.style.ROOT, hep.style.CMS])

In [2]:
colors = {}
colors['QCD'] = '#1f77b4'
colors['VBF'] = '#ff7f0e'
colors['VV'] = '#2ca02c'
colors['Wjets'] = '#d62728'
colors['WH'] = '#9467bd'
colors['Zjets'] = '#8c564b'
colors['ZH'] = '#e377c2'
colors['ggF'] = '#7f7f7f'
colors['ttbar'] = '#bcdb22'
colors['singlet'] = '#bcdb22'
colors['ttH'] = '#17becf'

In [3]:
lumis = {}
lumis['2016'] = 35.9
lumis['2017'] = 41.1
lumis['2018'] = 59.9

nfiles_mc = {}
nfiles_mc['2016'] = 113
nfiles_mc['2017'] = 167
nfiles_mc['2018'] = 201

nfiles_data = {}
nfiles_data['2016'] = 101
nfiles_data['2017'] = 97
nfiles_data['2018'] = 133

with open('xsec.json') as f:
    xs = json.load(f)

with open('pmap.json') as f:
    pmap = json.load(f)

In [4]:
year = '2016'
outsum = processor.dict_accumulator()

In [5]:
# Load all MC
for n in range(1,nfiles_mc[year]+1):
    print(n)
    filename = 'outfiles/'+year+'_'+str(n)+'.coffea'
    if os.path.isfile(filename):
        out = util.load(filename)
        outsum.add(out)
    else:
        print("File " + filename + " is missing")
        
scale_lumi = {k: xs[k] * 1000 *lumis[year] / w for k, w in outsum['sumw'].items()}
outsum['cutflow'].scale(scale_lumi, 'dataset')
outsum['templates1'].scale(scale_lumi, 'dataset')
outsum['templates2'].scale(scale_lumi, 'dataset')

1
File outfiles/2016_1.coffea is missing
2
File outfiles/2016_2.coffea is missing
3
File outfiles/2016_3.coffea is missing
4
File outfiles/2016_4.coffea is missing
5
File outfiles/2016_5.coffea is missing
6
File outfiles/2016_6.coffea is missing
7
File outfiles/2016_7.coffea is missing
8
File outfiles/2016_8.coffea is missing
9
File outfiles/2016_9.coffea is missing
10
File outfiles/2016_10.coffea is missing
11
File outfiles/2016_11.coffea is missing
12
File outfiles/2016_12.coffea is missing
13
File outfiles/2016_13.coffea is missing
14
File outfiles/2016_14.coffea is missing
15
File outfiles/2016_15.coffea is missing
16
File outfiles/2016_16.coffea is missing
17
File outfiles/2016_17.coffea is missing
18
File outfiles/2016_18.coffea is missing
19
File outfiles/2016_19.coffea is missing
20
File outfiles/2016_20.coffea is missing
21
File outfiles/2016_21.coffea is missing
22
File outfiles/2016_22.coffea is missing
23
File outfiles/2016_23.coffea is missing
24
File outfiles/2016_24.coff

In [6]:
# Load all data
for n in range(1,nfiles_data[year]+1):
    print(n)
    filename = 'outdata/'+year+'_'+str(n)+'.coffea'
    if os.path.isfile(filename):
        out = util.load(filename)
        outsum.add(out)
    else:
        print("File " + filename + " is missing")
        
# BE CAREFUL NOT TO UNBLIND
# Can you figure out how to blind it at this step???

1
File outdata/2016_1.coffea is missing
2
File outdata/2016_2.coffea is missing
3
File outdata/2016_3.coffea is missing
4
File outdata/2016_4.coffea is missing
5
File outdata/2016_5.coffea is missing
6
File outdata/2016_6.coffea is missing
7
File outdata/2016_7.coffea is missing
8
File outdata/2016_8.coffea is missing
9
File outdata/2016_9.coffea is missing
10
File outdata/2016_10.coffea is missing
11
File outdata/2016_11.coffea is missing
12
File outdata/2016_12.coffea is missing
13
File outdata/2016_13.coffea is missing
14
File outdata/2016_14.coffea is missing
15
File outdata/2016_15.coffea is missing
16
File outdata/2016_16.coffea is missing
17
File outdata/2016_17.coffea is missing
18
File outdata/2016_18.coffea is missing
19
File outdata/2016_19.coffea is missing
20
File outdata/2016_20.coffea is missing
21
File outdata/2016_21.coffea is missing
22
File outdata/2016_22.coffea is missing
23
File outdata/2016_23.coffea is missing
24
File outdata/2016_24.coffea is missing
25
File ou

In [7]:
templates = outsum['cutflow'].group('dataset', hist.Cat('process', 'Process'), pmap)



In [8]:
cutflow = templates.sum('genflavor').integrate('region','muoncontrol').integrate('process','muondata')
cutflow.values()

{}

In [9]:
cutflow = templates.sum('genflavor').integrate('region','muoncontrol').integrate('process','ttbar')
cutflow.values()

{(): array([1.21563365e+07, 1.21563365e+07, 4.16847688e+04, 2.74046330e+04,
        2.74046330e+04, 5.28688927e+03, 2.94926826e+03, 1.75268174e+03,
        2.47689196e+02, 2.98692652e+01, 0.00000000e+00])}

In [10]:
templates1 = outsum['templates1'].group('dataset', hist.Cat('process', 'Process'), pmap).integrate('region','muoncontrol')



In [11]:
hist.plot1d(templates1.sum('pt1').integrate('process','muondata'),overlay='ddb1')

KeyError: 'No axis pt1 found in <Hist (process,msd1,ddb1,pt2,msd2,n2ddt2) instance at 0x7f444032d350>'

In [None]:
templates2 = outsum['templates2'].group('dataset', hist.Cat('process', 'Process'), pmap).integrate('region','muoncontrol')

In [None]:
hist.plot1d(templates2.sum('msd1','etamu').integrate('process','muondata'),overlay='ddb1')

In [None]:
hist.plot1d(templates2.sum('msd1','ptmu').integrate('process','muondata'),overlay='ddb1')

In [None]:
hist.plot1d(templates2.sum('ptmu','etamu').integrate('process','muondata'),overlay='ddb1')

In [None]:
hist.plot1d(templates2.sum('ptmu','etamu').integrate('process','ttbar'),overlay='ddb1')

In [None]:
def plot_stack(x,name):
    x.label = 'Events'
    axes = hist.plot1d(x, overlay='process', fill_opts={'edgecolor': (0,0,0,1)}, stack=True, order=['ttH','ZH','WH','VBF','ggF','VV','ttbar','singlet','Wjets','Zjets','QCD'])
    axes.set_prop_cycle(cycler(color=colors.values()))
    axes.set_yscale('log')
    axes.set_ylim(.001, 100000)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    #png_name = year+'/plot-all/'+name+'_stack.png'
    #plt.savefig(png_name,bbox_inches='tight')
    #pdf_name = year+'/plot-all/'+name+'_stack.pdf'
    #plt.savefig(pdf_name,bbox_inches='tight')

In [None]:
plot_stack(templates2.sum('ptmu','etamu').integrate('ddb1',int_range=slice(0,0.89)),'msd1')

In [None]:
plot_stack(templates2.sum('ptmu','etamu').integrate('ddb1',int_range=slice(0.89,1)),'msd1')

In [None]:
plot_stack(templates2.sum('msd1','etamu').integrate('ddb1',int_range=slice(0,0.89)),'ptmu')

In [None]:
plot_stack(templates2.sum('msd1','etamu').integrate('ddb1',int_range=slice(0.89,1)),'ptmu')

In [None]:
plot_stack(templates2.sum('msd1','ptmu').integrate('ddb1',int_range=slice(0,0.89)),'etamu')

In [None]:
plot_stack(templates2.sum('msd1','ptmu').integrate('ddb1',int_range=slice(0.89,1)),'etamu')