In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mplhep as hep
hep.style.use("CMS")
from coffea import util
import itertools
import os

## Scale factors and IOV

In [None]:
IOVs = ['2016']

lumi = {
    "2016APV": 19800.,
    "2016": 16120., #35920 - 19800
    "2016all": 35920,
    "2017": 41530.,
    "2018": 59740.
}

t_BR = 0.6741
ttbar_BR = 0.4544 #PDG 2019
ttbar_xs1 = 831.76 * (0.09210) #pb For ttbar mass from 700 to 1000
ttbar_xs2 = 831.76 * (0.02474) #pb For ttbar mass from 1000 to Inf
toptag_sf = 0.9
toptag_kf = 0.7 #0.7
qcd_xs = 1370000000.0 #pb From https://cms-gen-dev.cern.ch/xsdb



## analysis categories

In [None]:
# analysis categories #

label_dict = util.load(f'../outputs/QCD_{IOVs[0]}.coffea')['analysisCategories']
label_to_int_dict = {label: i for i, label in label_dict.items()}

signal_cats = [ i for label, i in label_to_int_dict.items() if '2t' in label]
pretag_cats = [ i for label, i in label_to_int_dict.items() if 'pre' in label]


# # ttagcats = ["AT&Pt", "at", "pret", "0t", "1t", ">=1t", "2t", ">=0t"]
# ttagcats = ["at", "pret", "2t"]
# btagcats = ["0b", "1b", "2b"]
# ycats = ['cen', 'fwd']

# # anacats = [ t+b+y for t,b,y in itertools.product( ttagcats, btagcats, ycats) ]
# anacats = [ t+y for t,y in itertools.product( ttagcats, ycats) ]


# label_dict = {i: label for i, label in enumerate(anacats)}
# label_to_int_dict = {label: i for i, label in enumerate(anacats)}




In [None]:
label_dict

## make plot image filenames

In [None]:
directories = [
    'images/png/closureTest/2016all',
    'images/png/closureTest/2016APV',
    'images/png/closureTest/2016',
    'images/png/closureTest/2017',
    'images/png/closureTest/2018',
    'images/pdf/closureTest/2016all',
    'images/pdf/closureTest/2016APV',
    'images/pdf/closureTest/2016',
    'images/pdf/closureTest/2017',
    'images/pdf/closureTest/2018',
]


for path in directories:
    if not os.path.exists(path):
        os.makedirs(path)

## coffea files

In [None]:


coffea_dir = '../outputs/'
coffeaFiles = {
    "JetHT":{
        "unweighted": {
            "2016APV": {
                "B": coffea_dir+'JetHT_2016APVB.coffea',
                "C": coffea_dir+'JetHT_2016APVC.coffea',
                "D": coffea_dir+'JetHT_2016APVD.coffea',
                "E": coffea_dir+'JetHT_2016APVE.coffea',
            },
            "2016": {
                "F": coffea_dir+'JetHT_2016F.coffea',
                "G": coffea_dir+'JetHT_2016G.coffea',
                "H": coffea_dir+'JetHT_2016H.coffea',

            },
            "2017": '',
            "2018": ''
        },
        "weighted": {
            "2016APV": {
                "B": coffea_dir+'JetHT_2016APVB_bkgest.coffea',
                "C": coffea_dir+'JetHT_2016APVC_bkgest.coffea',
                "D": coffea_dir+'JetHT_2016APVD_bkgest.coffea',
                "E": coffea_dir+'JetHT_2016APVE_bkgest.coffea',
            },
            "2016": {
                "F": coffea_dir+'JetHT_2016F_bkgest.coffea',
                "G": coffea_dir+'JetHT_2016G_bkgest.coffea',
                "H": coffea_dir+'JetHT_2016H_bkgest.coffea',

            },
            "2017": '',
            "2018": ''
        }
    },
    
    "TTbar": {
        "2016APV": {
            "700to1000": coffea_dir+'TTbar_2016APV_700to1000.coffea',
            "1000toInf": coffea_dir+'TTbar_2016APV_1000toInf.coffea',
        },
        "2016": {
            "700to1000": coffea_dir+'TTbar_2016_700to1000.coffea',
            "1000toInf": coffea_dir+'TTbar_2016_1000toInf.coffea',
        },
        "2017": {
            "700to1000": '',
            "1000toInf": '',
        },
        "2018": {
            "700to1000": '',
            "1000toInf": '',
        }
    }
}




## load and scale histograms (inclusive)

In [None]:

histograms = {}

for IOV in IOVs:
    
    
    jethtfiles = []
    
    for key, file in coffeaFiles["JetHT"]["weighted"][IOV].items():
        jethtfiles.append(util.load(file))
    
    jethtfiles_sr = []
    for key, file in coffeaFiles["JetHT"]["unweighted"][IOV].items():
        jethtfiles_sr.append(util.load(file))

    ttbarfiles =  {
        "700to1000": util.load(coffeaFiles["TTbar"][IOV]['700to1000']),
        "1000toInf": util.load(coffeaFiles["TTbar"][IOV]['1000toInf'])
    }
    
    ttbar_sf = {
        "700to1000": lumi[IOV] * ttbar_xs1 * toptag_sf**2 / ttbarfiles['700to1000']['cutflow']['sumw'],
        "1000toInf": lumi[IOV] * ttbar_xs2 * toptag_sf**2 / ttbarfiles['1000toInf']['cutflow']['sumw']
    }
    
    
    mtt_ntmjs = []
    mtt_datas = []
        
    for coffeafile in jethtfiles:
        mtt_ntmjs.append(coffeafile['ttbarmass'][{'anacat':pretag_cats}][{'anacat':sum}])
    for coffeafile in jethtfiles_sr:
        mtt_datas.append(coffeafile['ttbarmass'][{'anacat':signal_cats}][{'anacat':sum}])
        
        
    mtt_ttbars = [
        ttbarfiles['700to1000']['ttbarmass'][{'anacat':signal_cats}][{'anacat':sum}],
        ttbarfiles['1000toInf']['ttbarmass'][{'anacat':signal_cats}][{'anacat':sum}]
    ]
    
    # add together hists in data signal region
    mtt_data = mtt_datas[0]
    for i in range(len(mtt_datas) - 1): 
        mtt_data = mtt_data + mtt_datas[i+1]

    # add together hists in data pretag region, scaled with mistag rate
    mtt_ntmj = mtt_ntmjs[0]
    for i in range(len(mtt_ntmjs) - 1): 
        mtt_ntmj = mtt_ntmj + mtt_ntmjs[i+1]
    
    # add ttbar histograms from 700to1000 pt bin and 1000toInf pt bin
    mtt_ttbar = mtt_ttbars[0] * ttbar_sf['700to1000'] + mtt_ttbars[1] * ttbar_sf['1000toInf']
    
    
    histograms[IOV] = {
                        "data":  mtt_data,
                        "ntmj":  mtt_ntmj,
                        "ttbar": mtt_ttbar
                    }
    

## plot histograms (inclusive)

In [None]:
IOV = '2016'
tagger = 'DeepAK8 tagger'

fig, (ax1, ax2) = plt.subplots(nrows=2, height_ratios=[3, 1])

if IOV == '2016all':
    hdata  = histograms['2016APV']['data']  + histograms['2016']['data']
    hntmj  = histograms['2016APV']['ntmj']  + histograms['2016']['ntmj']
    httbar = histograms['2016APV']['ttbar'] + histograms['2016']['ttbar']
    year = '2016'
else:
    hdata  = histograms[IOV]['data']
    hntmj  = histograms[IOV]['ntmj']
    httbar = histograms[IOV]['ttbar']
    year = IOV
    
hbkg = hntmj + httbar  

hep.cms.label('', data=True, lumi='{0:0.1f}'.format(lumi[IOV]/1000.), year=IOV, loc=2, fontsize=20, ax=ax1)
hep.cms.text('Preliminary'+'\n'+r'$\Delta y$ inclusive'+'\n'+r'btag inclusive''\n'+tagger, 
             loc=2, fontsize=20, ax=ax1)

hep.histplot(hdata,  ax=ax1, histtype='errorbar', color='black', label='Data')
hep.histplot(hbkg,   ax=ax1, histtype='fill', color='xkcd:pale gold', label='NTMJ')
hep.histplot(httbar, ax=ax1, histtype='fill', color='xkcd:deep red', label='TTbar')


ratio_plot =  hdata / hbkg.values()
hep.histplot(ratio_plot, ax=ax2, histtype='errorbar', color='black')
ax2.set_ylim(0,2)
ax2.axhline(1, color='black', ls='--')
ax2.set_ylabel('Data/Bkg')

ax1.legend()
ax1.set_yscale('log')
ax1.set_ylabel('Events')
ax1.set_ylim(1e-1, 1e7)
ax1.set_xlim(900, 6000)
ax2.set_xlim(900, 6000)


plt.savefig(f'images/png/closureTest/{IOV}/closuretest_inclusive.png')
plt.savefig(f'images/pdf/closureTest/{IOV}/closuretest_inclusive.pdf')

plt.show()

## load and scale histograms (by category)

In [None]:
IOVs = ['2016']#, '2016APV']

histograms_cats = {}

cats = ['0bcen', '0bfwd', '1bcen', '1bfwd', '2bcen', '2bfwd']

# cats = ['cen', 'fwd']


for IOV in IOVs:
    
    histograms_cats[IOV] = {}
    
    
    jethtfiles = []
    
    for key, file in coffeaFiles["JetHT"]["weighted"][IOV].items():
        jethtfiles.append(util.load(file))
    
    jethtfiles_sr = []
    for key, file in coffeaFiles["JetHT"]["unweighted"][IOV].items():
        jethtfiles_sr.append(util.load(file))

    ttbarfiles =  {
        "700to1000": util.load(coffeaFiles["TTbar"][IOV]['700to1000']),
        "1000toInf": util.load(coffeaFiles["TTbar"][IOV]['1000toInf'])
    }
    
    ttbar_sf = {
        "700to1000": lumi[IOV] * ttbar_xs1 * toptag_sf**2 / ttbarfiles['700to1000']['cutflow']['sumw'],
        "1000toInf": lumi[IOV] * ttbar_xs2 * toptag_sf**2 / ttbarfiles['1000toInf']['cutflow']['sumw']
    }
    
    for cat in cats:
        
        signal_cat = label_to_int_dict['2t'+cat]
        pretag_cat = label_to_int_dict['pret'+cat]
            
        mtt_ntmjs = []
        mtt_datas = []

        for coffeafile in jethtfiles:
            mtt_ntmjs.append(coffeafile['ttbarmass'][{'anacat':pretag_cat}]) #[{'anacat':sum}])
        for coffeafile in jethtfiles_sr:
            mtt_datas.append(coffeafile['ttbarmass'][{'anacat':signal_cat}]) #[{'anacat':sum}])
        
        mtt_ttbars = [
            ttbarfiles['700to1000']['ttbarmass'][{'anacat':signal_cat}], #[{'anacat':sum}],
            ttbarfiles['1000toInf']['ttbarmass'][{'anacat':signal_cat}] #[{'anacat':sum}]
        ]

        # add together hists in data signal region
        mtt_data = mtt_datas[0]
        for i in range(len(mtt_datas) - 1): 
            mtt_data = mtt_data + mtt_datas[i+1]

        # add together hists in data pretag region, scaled with mistag rate
        mtt_ntmj = mtt_ntmjs[0]
        for i in range(len(mtt_ntmjs) - 1): 
            mtt_ntmj = mtt_ntmj + mtt_ntmjs[i+1]

        # add ttbar histograms from 700to1000 pt bin and 1000toInf pt bin
        mtt_ttbar = mtt_ttbars[0] * ttbar_sf['700to1000'] + mtt_ttbars[1] * ttbar_sf['1000toInf']


        histograms_cats[IOV][cat] = {
                            "data":  mtt_data,
                            "ntmj":  mtt_ntmj,
                            "ttbar": mtt_ttbar
                        }
    

## plot histograms (by category)

In [None]:
IOV = '2016'

for cat in histograms_cats[IOV.replace('all','')].keys():
    
    print(cat)

    if IOV == '2016all':
        
            hdata  = histograms_cats['2016APV'][cat]['data']  + histograms_cats['2016'][cat]['data']
            hntmj  = (histograms_cats['2016APV'][cat]['ntmj']  + histograms_cats['2016'][cat]['ntmj'])
            httbar = histograms_cats['2016APV'][cat]['ttbar'] + histograms_cats['2016'][cat]['ttbar']
            hbkg = hntmj + httbar
            year = '2016'
            
    else:      
        hdata = histograms_cats[IOV][cat]['data']
        hntmj = histograms_cats[IOV][cat]['ntmj']
        httbar = histograms_cats[IOV][cat]['ttbar']
        hbkg = hntmj + httbar
        year = IOV
    
    
    dytext = ''
    if 'cen' in cat:
        dytext = r'$\Delta y$ < 1.0'
    elif 'fwd' in cat:
        dytext = r'$\Delta y$ > 1.0'

    btext = ''
    if '0b' in cat:
        btext = '0 b-tags'
    elif '1b' in cat:
        btext = '1 b-tag'
    elif '2b' in cat:
        btext = '2 b-tags'

    fig, (ax1, ax2) = plt.subplots(nrows=2, height_ratios=[3, 1])

    hep.cms.label('', data=True, lumi='{0:0.1f}'.format(lumi[IOV]/1000.),
                  year=year, loc=2, fontsize=20, ax=ax1)
    hep.cms.text('Preliminary'+'\n'
                 +f'{btext}, {dytext}'+'\n'
                 +tagger, 
                 loc=2, fontsize=20, ax=ax1)



    hep.histplot(hdata, ax=ax1, histtype='errorbar', color='black', label='Data')
    hep.histplot(hbkg,  ax=ax1, histtype='fill', color='xkcd:pale gold', label='NTMJ')
    hep.histplot(httbar,  ax=ax1, histtype='fill', color='xkcd:deep red',label='TTbar')


    ratio_plot = hdata / hbkg.values()
    
    ratio_err = (hdata.values() / hbkg.values()) * np.sqrt(1/hbkg.values() + 1/hdata.values())
    
    
    hep.histplot(ratio_plot, yerr=ratio_err, ax=ax2, histtype='errorbar', color='black')
    ax2.set_ylim(0,2.5)
    ax2.axhline(1, color='black', ls='--')
    ax2.set_ylabel('Data/Bkg')
    ax2.set_xlim(1000,6000)

    ax1.legend()
    ax1.set_yscale('log')
    ax1.set_ylabel('Events')
    ax1.set_ylim(1e-1, 1e5)
    ax1.set_xlim(1000, 6000)
    ax1.set_xlabel('')


    savefilename = f'images/png/closureTest/{IOV}/closuretest_{cat}''.png'
    plt.savefig(savefilename)
    plt.savefig(savefilename.replace('png', 'pdf'))

    plt.show()
    
for cat in histograms_cats[IOV.replace('all','')].keys():
    print('saving', f'images/png/closureTest/{IOV}/closuretest_{cat}''.png' )       
print()
for cat in histograms_cats[IOV.replace('all','')].keys():
    print('saving', f'images/pdf/closureTest/{IOV}/closuretest_{cat}''.pdf')
    