## 1) script to scale all histograms in coffea files
- saved in outputs/scale/
- also scales the cutflow dictionary
## 2) adds the scaled files from multiple years.
- the histograms in the files must all have the same axes
## 3) makes root files for 2DAlphabet
- inclusive or split into regions

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import mplhep as hep
hep.style.use("CMS")
from coffea import util
import itertools
import os, sys
import glob
import copy
import uproot

sys.path.append('../python/')
import functions



In [None]:
functions.makeSaveDirectories()

### analysis categories

In [None]:
label_map = functions.getLabelMap()
label_to_int = {label: i for i, label in label_map.items()}
signal_cats = [ i for label, i in label_to_int.items() if '2t' in label]
pretag_cats = [ i for label, i in label_to_int.items() if 'pre' in label]
antitag_cats = [ i for label, i in label_to_int.items() if 'at' in label]



print('------ analysis category map --------')
for i, lab in label_map.items():
    print(f'{i}: {lab}')
print('-------------------------------------')


print('\n\n------ coffea file content --------')

for hname in functions.loadCoffeaFile().keys():
    print(hname)
print('-------------------------------------')


## 1) Scale histograms in IOV

In [None]:
coffeafiles = functions.getCoffeaFilenames()

datasets = ['QCD', 'TTbar', 'JetHT', 'RSGluon']
datasets = ['JetHT']

hasBkgEst = True
bkgest_str = '_bkgest' if hasBkgEst else ''
filetype = 'weighted' if hasBkgEst else 'unweighted'

IOV = '2016'
for ds in datasets:
    
    

    try:

        coffeafiles[ds][filetype][IOV].keys()
        sections = coffeafiles[ds][filetype][IOV].keys()

        files = []


        for s in sections:

            filename = coffeafiles[ds][filetype][IOV][s]
            original_file = util.load(filename)

            file = copy.deepcopy(original_file)

            if 'JetHT' in ds:

                files.append(file)

            else:

                factor = toptag_sf**2 if 'TTbar' in ds else 1.0
                sf = functions.lumi[IOV] * functions.xs[ds][s] * factor / file['cutflow']['sumw']

                for key in file.keys():

                    if 'hist' in str(type(file[key])):
                        file[key] = file[key] * sf

                    elif 'cutflow' in key:
                        for cut in file[key].keys():
                            file[key][cut] = file[key][cut] * sf



                files.append(file)

                if 'RSGluon' in ds or 'ZPrime' in ds:

                    util.save(file, f'../outputs/scale/{ds}{s}_{IOV}.coffea')
                    print(f'saving ../outputs/scale/{ds}{s}_{IOV}.coffea')


        if 'RSGluon' not in ds and 'ZPrime' not in ds:

            file = files[0]

            for f in files[1:]:
                for key in file.keys():

                    if 'hist' in str(type(file[key])):
                        file[key] = file[key] + f[key]

                    elif 'cutflow' in key:
                        for cut in f[key].keys():
                            f[key][cut] = f[key][cut] + file[key][cut]

            savefilename = f'../outputs/scale/{ds}_{IOV}{bkgest_str}.coffea'
            util.save(file, savefilename)
            print(f'saving {savefilename}')

    except:

        filename = coffeafiles[ds][filetype][IOV]
        original_file = util.load(filename)
        file = copy.deepcopy(original_file)


        sf = functions.lumi[IOV] * functions.xs[ds] / file['cutflow']['sumw']

        for key in file.keys():

            if 'hist' in str(type(file[key])):
                file[key] = file[key] * sf

            elif 'cutflow' in key:

                for cut in file[key].keys():

                    file[key][cut] = file[key][cut] * sf

        savefilename = f'../outputs/scale/{ds}_{IOV}{bkgest_str}.coffea'
        util.save(file, savefilename)
        print(f'saving {savefilename}')






## 2.1) Combine JetHT files (blinded)


In [None]:
IOVs = ['2016APV', '2016']#, '2017', '2018']


hasBkgEst = True
bkgest_str = '_bkgest' if hasBkgEst else ''

files = []
for IOV in IOVs:

    file = util.load(f'../outputs/scale/JetHT_{IOV}{bkgest_str}.coffea')
    files.append(file)
    
systs = 'nominal'#, 'pileupDown', 'pileupUp', 'prefiringDown', 'prefiringUp', 'pdfDown', 'pdfUp', 'btagDown', 'btagUp', 'jesDown', 'jesUp', 'jerDown', 'jerUp']
file = files[0]
for f in files[1:]:
    for key in file.keys():

        if 'hist' in str(type(file[key])):
            
            file[key] = file[key] + f[key]            

        elif 'cutflow' in key:
            for cut in f[key].keys():
                f[key][cut] = f[key][cut] + file[key][cut]  


# savefilename = f'../outputs/scale/JetHT_blinded.coffea'

if hasBkgEst:
    savefilename = f'../outputs/scale/NTMJ_2016all_unblinded.coffea'
    
else:
    savefilename = f'../outputs/scale/JetHT_2016all_unblinded.coffea'


util.save(file, savefilename)
print(f'saving {savefilename}')


## 2.2) Combine MC files (for plotting)


In [None]:
IOVs = ['2016APV', '2016', '2017', '2018']
datasets = ['QCD', 'TTbar', 'RSGluon']

files = []


# all all RSGluon samples
datasets += ['RSGluon'+str(int(b)) for b in np.linspace(1000,5000,9)]

for ds in datasets:

    for IOV in IOVs:

        file = util.load(f'../outputs/scale/{ds}_{IOV}.coffea')
        files.append(file)


    file = files[0]
    for f in files[1:]:
        for key in file.keys():

            if 'hist' in str(type(file[key])):
                file[key] = file[key] + f[key]

            elif 'cutflow' in key:
                for cut in f[key].keys():
                    f[key][cut] = f[key][cut] + file[key][cut]  


    savefilename = f'../outputs/scale/{ds}_all.coffea'
    util.save(file, savefilename)
    print(f'saving {savefilename}')


## 2.3) Combine 2016noAPV and 2016APV

In [None]:


IOVs = ['2016', '2016APV']
datasets = ['JetHT']#, 'TTbar']


hasBkgEst = True
bkgest_str = '_bkgest' if hasBkgEst else ''


# add all RSGluon samples
datasets += ['RSGluon'+str(int(b)) for b in np.linspace(1000,5000,9)]


for ds in datasets:
    
    files = []
    for IOV in IOVs:

        file = util.load(f'../outputs/scale/{ds}_{IOV}{bkgest_str}.coffea')
        files.append(file)


    file = files[0]
    for f in files[1:]:
        for key in file.keys():

            if 'hist' in str(type(file[key])):

                file[key] = file[key] + f[key]

            elif 'cutflow' in key:
                for cut in f[key].keys():
                    f[key][cut] = f[key][cut] + file[key][cut]  

    savefilename = f'../outputs/scale/{ds}_2016all{bkgest_str}.coffea'
    util.save(file, savefilename)
    print(f'saving {savefilename}')
    


## 3) Make root files for 2DAlphabet

In [None]:
systematics = ['nominal', 'jes', 'jer', 'pileup', 'pdf', 'q2', 'btag', 'prefiring']
syst_labels = ['nominal']
for s in systematics:
    if not 'nominal' in s:
        syst_labels.append(s+'Down')
        syst_labels.append(s+'Up')
        
print(syst_labels)

In [None]:
year = '2016all'

dataOnly = False
inclusive = True

if inclusive:
    
    cats, cat_labels = [''], ['']
    
else:

    cats = ['0bcen', '0bfwd', '1bcen', '1bfwd', '2bcen', '2bfwd']
    cat_labels = ['cen0b', 'fwd0b', 'cen1b', 'fwd1b', 'cen2b', 'fwd2b']


    
    
savefileheader = '../outputs/twodalphabet/TTbarAllHad{}_'.format(year.replace('20', ''))
                                                                
fdata  = uproot.recreate(savefileheader+'Data.root')

if not dataOnly:
    fttbar = uproot.recreate(savefileheader+'TTbar.root')
    f1000  = uproot.recreate(savefileheader+'signalRSGluon1000.root')
    f1500  = uproot.recreate(savefileheader+'signalRSGluon1500.root')
    f2000  = uproot.recreate(savefileheader+'signalRSGluon2000.root')
    f2500  = uproot.recreate(savefileheader+'signalRSGluon2500.root')
    f3000  = uproot.recreate(savefileheader+'signalRSGluon3000.root')
    f3500  = uproot.recreate(savefileheader+'signalRSGluon3500.root')
    f4000  = uproot.recreate(savefileheader+'signalRSGluon4000.root')
    f4500  = uproot.recreate(savefileheader+'signalRSGluon4500.root')
    f5000  = uproot.recreate(savefileheader+'signalRSGluon5000.root')


for cat, catname in zip(cats, cat_labels):
    
    if inclusive:
        
        signal_cats = [ i for label, i in label_to_int.items() if '2t' in label]
        antitag_cats = [ i for label, i in label_to_int.items() if 'at' in label]
        sum_axes = ['anacat']

    else :
        
        signal_cats = label_to_int['2t'+cat]
        antitag_cats = label_to_int['at'+cat]
        sum_axes = []
    
    for syst in syst_labels:
        print(syst, cat)

        integrate_pass = {'anacat':signal_cats, 'systematic': syst}
        integrate_fail = {'anacat':antitag_cats, 'systematic': syst}

        systname = syst.upper()[:-2] + 'up' if 'Up' in syst else syst.upper()[:-4] + 'down'

        if 'nominal' in syst:

            systname = ''
            hdata_pass = functions.getHist2('mtt_vs_mt', 'JetHT', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hdata_fail = functions.getHist2('mtt_vs_mt', 'JetHT', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 

            fdata["MttvsMt"+catname+"Pass"+systname] = hdata_pass
            fdata["MttvsMt"+catname+"Fail"+systname] = hdata_fail

        if not dataOnly:

            hRSGluon1000_pass = functions.getHist2('mtt_vs_mt', 'RSGluon1000', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon1500_pass = functions.getHist2('mtt_vs_mt', 'RSGluon1500', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon2000_pass = functions.getHist2('mtt_vs_mt', 'RSGluon2000', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon2500_pass = functions.getHist2('mtt_vs_mt', 'RSGluon2500', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon3000_pass = functions.getHist2('mtt_vs_mt', 'RSGluon3000', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon3500_pass = functions.getHist2('mtt_vs_mt', 'RSGluon3500', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon4000_pass = functions.getHist2('mtt_vs_mt', 'RSGluon4000', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon4500_pass = functions.getHist2('mtt_vs_mt', 'RSGluon4500', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            hRSGluon5000_pass = functions.getHist2('mtt_vs_mt', 'RSGluon5000', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 

            hRSGluon1000_fail = functions.getHist2('mtt_vs_mt', 'RSGluon1000', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon1500_fail = functions.getHist2('mtt_vs_mt', 'RSGluon1500', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon2000_fail = functions.getHist2('mtt_vs_mt', 'RSGluon2000', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon2500_fail = functions.getHist2('mtt_vs_mt', 'RSGluon2500', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon3000_fail = functions.getHist2('mtt_vs_mt', 'RSGluon3000', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon3500_fail = functions.getHist2('mtt_vs_mt', 'RSGluon3500', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon4000_fail = functions.getHist2('mtt_vs_mt', 'RSGluon4000', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon4500_fail = functions.getHist2('mtt_vs_mt', 'RSGluon4500', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 
            hRSGluon5000_fail = functions.getHist2('mtt_vs_mt', 'RSGluon5000', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 

            httbar_pass = functions.getHist2('mtt_vs_mt', 'TTbar', year, sum_axes=sum_axes, integrate_axes=integrate_pass) 
            httbar_fail = functions.getHist2('mtt_vs_mt', 'TTbar', year, sum_axes=sum_axes, integrate_axes=integrate_fail) 


            # save hists

            fttbar["MttvsMt"+catname+"Pass"+systname] = httbar_pass
            fttbar["MttvsMt"+catname+"Fail"+systname] = httbar_fail

            f1000["MttvsMt"+catname+"Pass"+systname] = hRSGluon1000_pass
            f1500["MttvsMt"+catname+"Pass"+systname] = hRSGluon1500_pass
            f2000["MttvsMt"+catname+"Pass"+systname] = hRSGluon2000_pass
            f2500["MttvsMt"+catname+"Pass"+systname] = hRSGluon2500_pass
            f3000["MttvsMt"+catname+"Pass"+systname] = hRSGluon3000_pass
            f3500["MttvsMt"+catname+"Pass"+systname] = hRSGluon3500_pass
            f4000["MttvsMt"+catname+"Pass"+systname] = hRSGluon4000_pass
            f4500["MttvsMt"+catname+"Pass"+systname] = hRSGluon4500_pass
            f5000["MttvsMt"+catname+"Pass"+systname] = hRSGluon5000_pass

            f1000["MttvsMt"+catname+"Fail"+systname] = hRSGluon1000_fail
            f1500["MttvsMt"+catname+"Fail"+systname] = hRSGluon1500_fail
            f2000["MttvsMt"+catname+"Fail"+systname] = hRSGluon2000_fail
            f2500["MttvsMt"+catname+"Fail"+systname] = hRSGluon2500_fail
            f3000["MttvsMt"+catname+"Fail"+systname] = hRSGluon3000_fail
            f3500["MttvsMt"+catname+"Fail"+systname] = hRSGluon3500_fail
            f4000["MttvsMt"+catname+"Fail"+systname] = hRSGluon4000_fail
            f4500["MttvsMt"+catname+"Fail"+systname] = hRSGluon4500_fail
            f5000["MttvsMt"+catname+"Fail"+systname] = hRSGluon5000_fail





fdata.close()
                                                                
print('saving '+savefileheader+'Data.root')

if not dataOnly:
    fttbar.close()
    f1000.close()
    f1500.close()
    f2000.close()
    f2500.close()
    f3000.close()
    f3500.close()
    f4000.close()
    f4500.close()
    f5000.close()
    
    print('saving '+savefileheader+'TTbar.root')
    print('saving '+savefileheader+'RSGluon1000.root')
    print('saving '+savefileheader+'RSGluon1500.root')   
    print('saving '+savefileheader+'RSGluon2000.root')
    print('saving '+savefileheader+'RSGluon2500.root')
    print('saving '+savefileheader+'RSGluon3000.root')                                                                  
    print('saving '+savefileheader+'RSGluon3500.root')
    print('saving '+savefileheader+'RSGluon4000.root')
    print('saving '+savefileheader+'RSGluon4500.root')   
    print('saving '+savefileheader+'RSGluon5000.root')  