In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import os
import copy
import hist
from coffea import util
import numpy as np
import itertools
import pandas as pd
import mplhep as hep

In [None]:
dir1 = 'CoffeaOutputsForCombine/Coffea_FirstRun/'

btagdir = 'MediumBTag/'
yeardir = '2016/'
vfp = 'postVFP'
btagType = 'CSVV2'
APVDir = {
    'preVFP': 'APV/',
    'postVFP': 'noAPV/'
}

od = ''
oddir = ''
if btagdir == '':
    od = '_oldANdisc'
    oddir = 'Old2016_MediumBTag/'

# Load All Data Eras

In [None]:
from LoadData import JetHT_Unweighted, JetHT_Weighted, Cutflow

JetHT2016_unweighted = JetHT_Unweighted('MediumBTag', 2016)

# Load All MC

In [None]:
TTbar_unweighted1, TTbar_unweighted2 = {}, {}

In [None]:
for VFP in ['preVFP', 'postVFP']:
    TTbar_unwgt_str1 = f'TTbarRes_0l_UL16{VFP}_TTbar_700_1000'
    TTbar_unwgt_str2 = f'TTbarRes_0l_UL16{VFP}_TTbar_1000_Inf'
    TTbar_unweighted1[VFP] = util.load(f'{dir1}TT/{btagdir}{yeardir}{APVDir[VFP]}{TTbar_unwgt_str1}{od}.coffea')
    TTbar_unweighted2[VFP] = util.load(f'{dir1}TT/{btagdir}{yeardir}{APVDir[VFP]}{TTbar_unwgt_str2}{od}.coffea')

# Make Save Directory

In [None]:
def mkdir_p(mypath):
    '''Creates a directory. equivalent to using mkdir -p on the command line'''

    from errno import EEXIST
    from os import makedirs,path

    try:
        makedirs(mypath)
    except OSError as exc: # Python >2.5
        if exc.errno == EEXIST and path.isdir(mypath):
            pass
        else: raise

In [None]:
def DoesDirectoryExist(mypath): #extra precaution (Probably overkill...)
    '''Checks to see if Directory exists before running mkdir_p'''
    import os.path
    from os import path
    
    if path.exists(mypath):
        pass
    else:
        mkdir_p(mypath)

In [None]:
maindirectory = os.getcwd() 
print(maindirectory)

## Definitions

In [None]:
def plotratio(numerator, denominator, ax=None, histtype='errorbar', marker='.', markersize=5., color='k', alpha=0.1):
    NumeratorAxes = numerator.axes
    DenominatorAxes = denominator.axes
    
    # integer number of bins in this axis #
    NumeratorAxis1_BinNumber = NumeratorAxes[0].size - 3 # Subtract 3 to remove overflow
    
    DenominatorAxis1_BinNumber = DenominatorAxes[0].size - 3 
    
    if(NumeratorAxis1_BinNumber != DenominatorAxis1_BinNumber):
        raise Exception('Numerator and Denominator axes are different sizes; Cannot perform division.')
    # else:
    #     Numerator = numerator.to_hist()
    #     Denominator = denominator.to_hist()
        
    ratio = numerator / denominator.values()
    
    if histtype == 'errorbar':
        return hep.histplot(ratio, ax=ax, histtype=histtype, marker=marker, markersize=markersize, color=color)
    elif histtype == 'fill':
        return hep.histplot(ratio, ax=ax, histtype=histtype, color=color, alpha=alpha, lw=5.)
    else:
        return hep.histplot(ratio, ax=ax, histtype=histtype, color=color)

In [None]:
def ConvertLabelToInt(mapping, str_label):
    for intkey, string in mapping.items():
        if str_label == string:
            return intkey

In [None]:
# ---- Optional to rescale x-axis of mistag rates ---- #
def forward(x):
    return x**(1/8)

def inverse(x):
    return x**8

# Categories

In [None]:
import matplotlib.pyplot as plt
import warnings
import re # regular expressions
warnings.filterwarnings("ignore")

# ---- Reiterate categories ---- #
ttagcats = ["at"] #, "0t", "1t", "It", "2t"]
btagcats = ["0b", "1b", "2b"]
ycats = ['cen', 'fwd']

list_of_cats = [ t+b+y for t,b,y in itertools.product( ttagcats, btagcats, ycats) ]
list_of_ints = [6, 7, 8, 9, 10, 11]
catmap = dict(zip(list_of_ints, list_of_cats))

# Luminosities, Cross Sections & Scale Factors

In [None]:
if vfp == 'preVFP':
    JetHT2016_unwgt_str = 'UL16preVFP_JetHTB_Data'
    Total2016 = JetHT2016_unweighted['B_preVFP']['cutflow']['all events']
    for Era in ['C', 'D', 'E', 'F']: #exclude B because histogram is initialized with B era
        JetHT2016_unwgt_str = f'UL16preVFP_JetHT{Era}_Data'
        Total2016 += JetHT2016_unweighted[Era+'_'+vfp]['cutflow']['all events']
else:
    JetHT2016_unwgt_str = 'UL16postVFP_JetHTF_Data'
    Total2016 = JetHT2016_unweighted['F_postVFP']['cutflow']['all events']
    for Era in ['G', 'H']: #exclude B because histogram is initialized with B era
        JetHT2016_unwgt_str = f'UL16postVFP_JetHT{Era}_Data'
        Total2016 += JetHT2016_unweighted[Era+'_'+vfp]['cutflow']['all events']

In [None]:
Nevts2016 = 625441538 # from dasgoclient
print(Total2016)

In [None]:
Nevts2016 = 625441538 # from dasgoclient

Lum2016 = 35920. # pb^-1 from https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVAnalysisSummaryTable
# Lum2017 = 41530.
# Lum2018 = 59740.
# Lum     = 137190.

t_BR = 0.6741
ttbar_BR = 0.4544 #PDG 2019
ttbar_xs1 = 831.76 * (0.09210) #pb For ttbar mass from 700 to 1000
ttbar_xs2 = 831.76 * (0.02474) #pb For ttbar mass from 1000 to Inf
toptag_sf = 0.9
toptag_kf = 0.7

if vfp == 'preVFP':
    Lum_frac = 19800. 
else:
    Lum_frac = Lum2016 - 19800.

# =========== SF =========== #
ttbar2016_sf1 = Lum_frac*ttbar_xs1*toptag_sf**2*toptag_kf/TTbar_unweighted1[vfp]['cutflow']['sumw']
ttbar2016_sf2 = Lum_frac*ttbar_xs2*toptag_sf**2*toptag_kf/TTbar_unweighted2[vfp]['cutflow']['sumw']

# Mistag Rates with $t\bar{t}$ Subtraction for SeparateYears

In [None]:
# SaveDirectory = maindirectory + '/' + 'MistagPlots' + '/' # split histograms into subdirectories
# DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

# for icat in list_of_ints:
        
#     title = 'mistag ' + catmap[icat][2:]
    
#     # ---- define ttbar numerator and denominator ---- #
#     NumeratorTT2016 = TTbar_unweighted[vfp]['numerator'][f'UL16{vfp}_TTbar', icat, :]               
#     DenominatorTT2016 = TTbar_unweighted[vfp]['denominator'][f'UL16{vfp}_TTbar', icat, :]
    
#     # ---- scale ttbar and prepare to subtract ---- #
#     NumeratorTT2016 *= (-ttbar2016_sf)
#     DenominatorTT2016 *= (-ttbar2016_sf)
    
#     # ---- initialize data histograms with first era ---- #
#     if vfp == 'preVFP':
#         JetHT2016_unwgt_str = 'UL16preVFP_JetHTB_Data'
#         Numerator2016 = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, icat, :]
#         Denominator2016 = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, icat, :]
#         for Era in ['C', 'D', 'E', 'F']: #exclude B because histogram is initialized with B era
#             JetHT2016_unwgt_str = f'UL16preVFP_JetHT{Era}_Data'
#             Numerator2016 += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, icat, :]
#             Denominator2016 += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, icat, :]
#     else:
#         JetHT2016_unwgt_str = 'UL16postVFP_JetHTF_Data'
#         Numerator2016 = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, icat, :]
#         Denominator2016 = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, icat, :]
#         for Era in ['G', 'H']: #exclude F because histogram is initialized with F era
#             JetHT2016_unwgt_str = f'UL16postVFP_JetHT{Era}_Data'
#             Numerator2016 += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, icat, :]
#             Denominator2016 += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, icat, :]
    
    
#     # ---- add (remove) ttbar mc ---- #
#     Numerator2016 += (NumeratorTT2016)
#     Denominator2016 += (DenominatorTT2016)

#     # ==== Scikit plot_ratio function ==== #
# #     fig = plt.figure(figsize=(10, 8))
# #     main_ax_artists, sublot_ax_arists = Numerator2016.plot_ratio(
# #         Denominator2016,
# #         rp_ylabel=r"Ratio",
# #         rp_num_label="num",
# #         rp_denom_label="denom",
# #         rp_uncert_draw_type="bar")
    
#     # ==== My function (Closer to coffea.hist.plotratio) ==== #
#     fig, ax = plt.subplots(
#         #nrows=2,
#         #ncols=1,
#         figsize=(7,5),
#         #gridspec_kw={"height_ratios": (3, 1)},
#         sharex=True
#     )
#     mistag2016 = plotratio(Numerator2016, Denominator2016, ax = ax, 
#                            histtype='errorbar', marker='.', markersize=13., color='r')


#     ax.set_ylabel('Mistag Rates')
#     ax.set_title(title)
#     leg = ax.legend(labels=["2016", "2017", "2018"])

#     # ---- Optional x-axis scaling ---- #
#     plt.ylim(bottom = 0, top = 0.10)
#     plt.xlim(left = 100, right = 3000)

#     # ---- Optional x-axis scaling ---- #
#     #ax.set_xscale('function', functions=(forward, inverse))
#     #plt.xticks(np.array([0, 500, 1000, 2000, 3000, 4000, 5000]))

#     # ---- Optional x-axis scaling ---- #
#     #ax.set_xscale('function', functions=(forward, inverse))
#     #plt.xticks(np.array([500, 1000, 2000, 5000, 10000]))
#     #plt.yticks(np.array([.05, .10, .15]))
#     #ax.set_xscale('log')

#     #filename = 'Mistag_bdisc8484_ttbarSubtraction_SeparateYears_' + icat + '.png'
#     #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
#     #print('\n' + filename + ' saved')

## Mistag Rates with $t\bar{t}$ Subtraction for SeparateYears with Inclusive $y$ Region

In [None]:
# """ ---------- Rapidity Inclusive Mistag Rates ---------- """
# SaveDirectory = maindirectory + '/' + 'MistagPlots' + '/' # split histograms into subdirectories
# DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already


# for b in range(3):

#     title = f'Inclusive {b}btag Mistag Rate'
    
#     NumeratorTT2016cen = TTbar_unweighted[vfp]['numerator'][f'UL16{vfp}_TTbar', 6+2*b, :]               
#     DenominatorTT2016cen = TTbar_unweighted[vfp]['denominator'][f'UL16{vfp}_TTbar', 6+2*b, :]
#     NumeratorTT2016fwd = TTbar_unweighted[vfp]['numerator'][f'UL16{vfp}_TTbar', 7+2*b, :]               
#     DenominatorTT2016fwd = TTbar_unweighted[vfp]['denominator'][f'UL16{vfp}_TTbar', 7+2*b, :]

    
#     NumeratorTT2016cen *= (-ttbar2016_sf)
#     DenominatorTT2016cen *= (-ttbar2016_sf)
#     NumeratorTT2016fwd *= (-ttbar2016_sf)
#     DenominatorTT2016fwd *= (-ttbar2016_sf)
    
#     # =================================================================================================== #
    
#     # ---- initialize data histograms with first era ---- #
#     if vfp == 'preVFP':
#         JetHT2016_unwgt_str = 'UL16preVFP_JetHTB_Data'
#         Num_cen2016 = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 6+2*b, :]
#         Denom_cen2016 = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 6+2*b, :]
#         Num_fwd2016 = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 7+2*b, :]
#         Denom_fwd2016 = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 7+2*b, :]
#         for Era in ['C', 'D', 'E', 'F']: #exclude B because histogram is initialized with B era
#             JetHT2016_unwgt_str = f'UL16preVFP_JetHT{Era}_Data'
#             Num_cen2016 += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 6+2*b, :]
#             Denom_cen2016 += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 6+2*b, :]
#             Num_fwd2016 += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 7+2*b, :]
#             Denom_fwd2016 += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 7+2*b, :]
#     else:
#         JetHT2016_unwgt_str = 'UL16postVFP_JetHTF_Data'
#         Num_cen2016 = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 6+2*b, :]
#         Denom_cen2016 = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 6+2*b, :]
#         Num_fwd2016 = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 7+2*b, :]
#         Denom_fwd2016 = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 7+2*b, :]
#         for Era in ['G', 'H']: #exclude F because histogram is initialized with F era
#             JetHT2016_unwgt_str = f'UL16postVFP_JetHT{Era}_Data'
#             Num_cen2016 += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 6+2*b, :]
#             Denom_cen2016 += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 6+2*b, :]
#             Num_fwd2016 += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 7+2*b, :]
#             Denom_fwd2016 += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 7+2*b, :]
                
#     # =================================================================================================== #
    
    
#     Num_cen2016 += (NumeratorTT2016cen)
#     Denom_cen2016 += (DenominatorTT2016cen)

#     Num_fwd2016 += (NumeratorTT2016fwd)
#     Denom_fwd2016 += (DenominatorTT2016fwd)
    
#     Num_inc2016 = Num_cen2016
#     Denom_inc2016 = Denom_cen2016
    
#     Num_inc2016 += (Num_fwd2016)
#     Denom_inc2016 += (Denom_fwd2016)
        
# #     fig = plt.figure(figsize=(10, 8))
# #     main_ax_artists, sublot_ax_arists = Num_inc2016.plot_ratio(
# #         Denom_inc2016,
# #         rp_ylabel=r"Ratio",
# #         rp_num_label="num",
# #         rp_denom_label="denom",
# #         rp_uncert_draw_type="bar")

#     fig, ax = plt.subplots(
#         figsize=(10,5),
#         sharex=True
#     )

#     mistag2016 = plotratio(Num_inc2016, Denom_inc2016, ax = ax, histtype='errorbar', 
#                            marker='.', markersize=13., color='r')


#     ax.set_ylabel('Mistag Rates')
#     ax.set_title(title)
#     leg = ax.legend(labels=["2016", "2017", "2018"])

#     plt.ylim(bottom = 0, top = 0.08)
#     plt.xlim(left = 400, right = 7000)

#     # ---- Optional x-axis scaling ---- #
#     ax.set_xscale('function', functions=(forward, inverse))
#     plt.xticks(np.array([400, 500, 1000, 2000, 3000, 4000, 5000]))
#     ax.set_xticklabels(['', 500, 1000, 2000, 3000, 4000, 5000])
#     # ---- Optional x-axis scaling ---- #
#     #ax.set_xscale('function', functions=(forward, inverse))
#     #plt.xticks(np.array([500, 1000, 2000, 5000, 10000]))
#     #plt.yticks(np.array([.05, .10, .15]))
#     #ax.set_xscale('log')
    
#     #filename = 'InclusiveMistag_bdisc8484_ttbarSubtraction_SeparateYears_' + btag + '.png'
#     #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
#     #print('\n' + filename + ' saved')

# Mistag Rates with Inclusive $y$ Region Sorted by b-tag Region

In [None]:
SaveDirectory = f'{maindirectory}/BkgEstimate/MistagPlots/{yeardir}{btagdir}{oddir}{btagType}/{APVDir[vfp]}' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

fig, ax = plt.subplots(
    figsize=(10,5),
    sharex=True
)
HIP = ''
if vfp == 'preVFP':
    HIP = ' (with HIP)'
title = f'2016{HIP} Mistag Rate'

    
# =================================================================================================== #
    
# ---- initialize data histograms with first era ---- #
if vfp == 'preVFP':
    JetHT2016_unwgt_str = 'UL16preVFP_JetHTB_Data'
    Num_cen0b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 6, :]
    Denom_cen0b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 6, :]
    Num_fwd0b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 7, :]
    Denom_fwd0b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 7, :]
    Num_cen1b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 8, :]
    Denom_cen1b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 8, :]
    Num_fwd1b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 9, :]
    Denom_fwd1b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 9, :]
    Num_cen2b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 10, :]
    Denom_cen2b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 10, :]
    Num_fwd2b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 11, :]
    Denom_fwd2b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 11, :]
    for Era in ['C', 'D', 'E', 'F']: #exclude B because histogram is initialized with B era
        JetHT2016_unwgt_str = f'UL16preVFP_JetHT{Era}_Data'
        Num_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 6, :]
        Denom_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 6, :]
        Num_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 7, :]
        Denom_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 7, :]
        Num_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 8, :]
        Denom_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 8, :]
        Num_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 9, :]
        Denom_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 9, :]
        Num_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 10, :]
        Denom_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 10, :]
        Num_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 11, :]
        Denom_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 11, :]
else:
    JetHT2016_unwgt_str = 'UL16postVFP_JetHTF_Data'
    Num_cen0b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 6, :]
    Denom_cen0b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 6, :]
    Num_fwd0b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 7, :]
    Denom_fwd0b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 7, :]
    Num_cen1b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 8, :]
    Denom_cen1b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 8, :]
    Num_fwd1b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 9, :]
    Denom_fwd1b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 9, :]
    Num_cen2b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 10, :]
    Denom_cen2b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 10, :]
    Num_fwd2b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 11, :]
    Denom_fwd2b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 11, :]
    for Era in ['G','H']: #exclude B because histogram is initialized with B era
        JetHT2016_unwgt_str = f'UL16postVFP_JetHT{Era}_Data'
        Num_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 6, :]
        Denom_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 6, :]
        Num_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 7, :]
        Denom_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 7, :]
        Num_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 8, :]
        Denom_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 8, :]
        Num_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 9, :]
        Denom_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 9, :]
        Num_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 10, :]
        Denom_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 10, :]
        Num_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 11, :]
        Denom_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 11, :]

# =================================================================================================== #

Num_inc0b = Num_cen0b + Num_fwd0b
Denom_inc0b = Denom_cen0b + Denom_fwd0b

Numtotal = np.sum(Num_inc0b.view().value)
Denomtotal = np.sum(Denom_inc0b.view().value)
print(Numtotal)
print(Denomtotal)

mistag_inclusive0b = plotratio(Num_inc0b, Denom_inc0b, ax = ax, histtype='errorbar', 
                           marker='.', markersize=13., color='r')


Num_inc1b = Num_cen1b + Num_fwd1b
Denom_inc1b = Denom_cen1b + Denom_fwd1b

mistag_inclusive1b = plotratio(Num_inc1b, Denom_inc1b, ax = ax, histtype='errorbar', 
                           marker='s', markersize=7., color='g')


Num_inc2b = Num_cen2b + Num_fwd2b
Denom_inc2b = Denom_cen2b + Denom_fwd2b

mistag_inclusive2b = plotratio(Num_inc2b, Denom_inc2b, ax = ax, histtype='errorbar', 
                           marker='^', markersize=7., color='b')

plt.ylim(bottom = 0, top = 0.15)
plt.xlim([400,7000])

ax.set_ylabel('t-tag Mistag Rates')
ax.set_title(title)
leg = ax.legend(labels=["0b", "1b", "2b"],fontsize='x-large')

# ---- Optional x-axis scaling ---- #
ax.set_xscale('function', functions=(forward, inverse))
plt.xticks(np.array([400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]))
ax.set_xticklabels(['', 500, '', '', '', '', 1000, 2000, 3000, 4000, 5000])
# plt.yticks(np.array([.05, .10]))
#ax.set_xscale('log')

#---- Labeling ----#
Lint = str(Lum_frac*.001) # Integrated Luminosity
lumi = plt.text(1.0, 1.08, "L = " + Lint[:6] + " fb$^{-1}$",
        fontsize='xx-large',
        horizontalalignment='right',
        verticalalignment='top',
        transform=ax.transAxes
       )
CMS = plt.text(0.01, 0.98, 'CMS Preliminary',
        fontsize='xx-large',
        horizontalalignment='left',
        verticalalignment='top',
        transform=ax.transAxes
       )
rap = plt.text(0.05, 0.86, 'y-inclusive',
        fontsize='large',
        horizontalalignment='left',
        verticalalignment='top',
        transform=ax.transAxes
       )
coffee = plt.text(1.0, 0.87, u"☕",
              fontsize=50,
              horizontalalignment='left',
              verticalalignment='bottom',
              transform=ax.transAxes
             )

filename = 'InclusiveMistag.png'
# plt.savefig(SaveDirectory+filename, bbox_inches="tight")
print('\n' + SaveDirectory + filename + ' saved')

# Mistag Rates with $t\bar{t}$ Subtraction and Inclusive $y$ Region Sorted by b-tag Region

In [None]:
SaveDirectory = f'{maindirectory}/BkgEstimate/MistagPlots/{yeardir}{btagdir}{oddir}{btagType}/{APVDir[vfp]}' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

fig, ax = plt.subplots(
    figsize=(10,5),
    sharex=True
)
HIP = ''
if vfp == 'preVFP':
    HIP = ' (with HIP)'
title = f'2016{HIP} Mistag Rate'

str1 = '_700_1000'
str2 = '_1000_Inf'

NumTT1cen0b = TTbar_unweighted1[vfp]['numerator'][f'UL16{vfp}_TTbar{str1}', 6, :]               
DenomTT1cen0b = TTbar_unweighted1[vfp]['denominator'][f'UL16{vfp}_TTbar{str1}', 6, :]
NumTT1fwd0b = TTbar_unweighted1[vfp]['numerator'][f'UL16{vfp}_TTbar{str1}', 7, :]               
DenomTT1fwd0b = TTbar_unweighted1[vfp]['denominator'][f'UL16{vfp}_TTbar{str1}', 7, :]

NumTT1cen1b = TTbar_unweighted1[vfp]['numerator'][f'UL16{vfp}_TTbar{str1}', 8, :]               
DenomTT1cen1b = TTbar_unweighted1[vfp]['denominator'][f'UL16{vfp}_TTbar{str1}', 8, :]
NumTT1fwd1b = TTbar_unweighted1[vfp]['numerator'][f'UL16{vfp}_TTbar{str1}', 9, :]               
DenomTT1fwd1b = TTbar_unweighted1[vfp]['denominator'][f'UL16{vfp}_TTbar{str1}', 9, :]

NumTT1cen2b = TTbar_unweighted1[vfp]['numerator'][f'UL16{vfp}_TTbar{str1}', 10, :]               
DenomTT1cen2b = TTbar_unweighted1[vfp]['denominator'][f'UL16{vfp}_TTbar{str1}', 10, :]
NumTT1fwd2b = TTbar_unweighted1[vfp]['numerator'][f'UL16{vfp}_TTbar{str1}', 11, :]               
DenomTT1fwd2b = TTbar_unweighted1[vfp]['denominator'][f'UL16{vfp}_TTbar{str1}', 11, :]

NumTT1_inc0b = NumTT1cen0b + NumTT1fwd0b
DenomTT1_inc0b = DenomTT1cen0b + DenomTT1fwd0b

NumTT1_inc1b = NumTT1cen1b + NumTT1fwd1b
DenomTT1_inc1b = DenomTT1cen1b + DenomTT1fwd1b

NumTT1_inc2b = NumTT1cen2b + NumTT1fwd2b
DenomTT1_inc2b = DenomTT1cen2b + DenomTT1fwd2b

NumTT1_inc0b *= (-ttbar2016_sf1)
DenomTT1_inc0b *= (-ttbar2016_sf1)
NumTT1_inc1b *= (-ttbar2016_sf1)
DenomTT1_inc1b *= (-ttbar2016_sf1)
NumTT1_inc2b *= (-ttbar2016_sf1)
DenomTT1_inc2b *= (-ttbar2016_sf1)






NumTT2cen0b = TTbar_unweighted2[vfp]['numerator'][f'UL16{vfp}_TTbar{str2}', 6, :]               
DenomTT2cen0b = TTbar_unweighted2[vfp]['denominator'][f'UL16{vfp}_TTbar{str2}', 6, :]
NumTT2fwd0b = TTbar_unweighted2[vfp]['numerator'][f'UL16{vfp}_TTbar{str2}', 7, :]               
DenomTT2fwd0b = TTbar_unweighted2[vfp]['denominator'][f'UL16{vfp}_TTbar{str2}', 7, :]

NumTT2cen1b = TTbar_unweighted2[vfp]['numerator'][f'UL16{vfp}_TTbar{str2}', 8, :]               
DenomTT2cen1b = TTbar_unweighted2[vfp]['denominator'][f'UL16{vfp}_TTbar{str2}', 8, :]
NumTT2fwd1b = TTbar_unweighted2[vfp]['numerator'][f'UL16{vfp}_TTbar{str2}', 9, :]               
DenomTT2fwd1b = TTbar_unweighted2[vfp]['denominator'][f'UL16{vfp}_TTbar{str2}', 9, :]

NumTT2cen2b = TTbar_unweighted2[vfp]['numerator'][f'UL16{vfp}_TTbar{str2}', 10, :]               
DenomTT2cen2b = TTbar_unweighted2[vfp]['denominator'][f'UL16{vfp}_TTbar{str2}', 10, :]
NumTT2fwd2b = TTbar_unweighted2[vfp]['numerator'][f'UL16{vfp}_TTbar{str2}', 11, :]               
DenomTT2fwd2b = TTbar_unweighted2[vfp]['denominator'][f'UL16{vfp}_TTbar{str2}', 11, :]

NumTT2_inc0b = NumTT2cen0b + NumTT2fwd0b
DenomTT2_inc0b = DenomTT2cen0b + DenomTT2fwd0b

NumTT2_inc1b = NumTT2cen1b + NumTT2fwd1b
DenomTT2_inc1b = DenomTT2cen1b + DenomTT2fwd1b

NumTT2_inc2b = NumTT2cen2b + NumTT2fwd2b
DenomTT2_inc2b = DenomTT2cen2b + DenomTT2fwd2b

NumTT2_inc0b *= (-ttbar2016_sf2)
DenomTT2_inc0b *= (-ttbar2016_sf2)
NumTT2_inc1b *= (-ttbar2016_sf2)
DenomTT2_inc1b *= (-ttbar2016_sf2)
NumTT2_inc2b *= (-ttbar2016_sf2)
DenomTT2_inc2b *= (-ttbar2016_sf2)



NumTT_inc0b = NumTT1_inc0b + NumTT2_inc0b
DenomTT_inc0b = DenomTT1_inc0b + DenomTT2_inc0b
NumTT_inc1b = NumTT1_inc1b + NumTT2_inc1b
DenomTT_inc1b = DenomTT1_inc1b + DenomTT2_inc1b
NumTT_inc2b = NumTT1_inc2b + NumTT2_inc2b
DenomTT_inc2b = DenomTT1_inc2b + DenomTT2_inc2b

print(NumTT_inc0b.values())

    
# =================================================================================================== #
    
# ---- initialize data histograms with first era ---- #
if vfp == 'preVFP':
    JetHT2016_unwgt_str = 'UL16preVFP_JetHTB_Data'
    Num_cen0b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 6, :]
    Denom_cen0b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 6, :]
    Num_fwd0b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 7, :]
    Denom_fwd0b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 7, :]
    Num_cen1b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 8, :]
    Denom_cen1b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 8, :]
    Num_fwd1b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 9, :]
    Denom_fwd1b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 9, :]
    Num_cen2b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 10, :]
    Denom_cen2b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 10, :]
    Num_fwd2b = JetHT2016_unweighted['B_preVFP']['numerator'][JetHT2016_unwgt_str, 11, :]
    Denom_fwd2b = JetHT2016_unweighted['B_preVFP']['denominator'][JetHT2016_unwgt_str, 11, :]
    for Era in ['C', 'D', 'E', 'F']: #exclude B because histogram is initialized with B era
        JetHT2016_unwgt_str = f'UL16preVFP_JetHT{Era}_Data'
        Num_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 6, :]
        Denom_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 6, :]
        Num_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 7, :]
        Denom_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 7, :]
        Num_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 8, :]
        Denom_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 8, :]
        Num_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 9, :]
        Denom_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 9, :]
        Num_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 10, :]
        Denom_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 10, :]
        Num_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 11, :]
        Denom_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 11, :]
else:
    JetHT2016_unwgt_str = 'UL16postVFP_JetHTF_Data'
    Num_cen0b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 6, :]
    Denom_cen0b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 6, :]
    Num_fwd0b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 7, :]
    Denom_fwd0b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 7, :]
    Num_cen1b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 8, :]
    Denom_cen1b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 8, :]
    Num_fwd1b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 9, :]
    Denom_fwd1b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 9, :]
    Num_cen2b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 10, :]
    Denom_cen2b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 10, :]
    Num_fwd2b = JetHT2016_unweighted['F_postVFP']['numerator'][JetHT2016_unwgt_str, 11, :]
    Denom_fwd2b = JetHT2016_unweighted['F_postVFP']['denominator'][JetHT2016_unwgt_str, 11, :]
    for Era in ['G', 'H']: #exclude F because histogram is initialized with F era
        JetHT2016_unwgt_str = f'UL16postVFP_JetHT{Era}_Data'
        Num_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 6, :]
        Denom_cen0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 6, :]
        Num_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 7, :]
        Denom_fwd0b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 7, :]
        Num_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 8, :]
        Denom_cen1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 8, :]
        Num_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 9, :]
        Denom_fwd1b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 9, :]
        Num_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 10, :]
        Denom_cen2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 10, :]
        Num_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['numerator'][JetHT2016_unwgt_str, 11, :]
        Denom_fwd2b += JetHT2016_unweighted[Era+'_'+vfp]['denominator'][JetHT2016_unwgt_str, 11, :]
            
# =================================================================================================== #

Num_inc0b = Num_cen0b + Num_fwd0b + NumTT_inc0b
Denom_inc0b = Denom_cen0b + Denom_fwd0b + DenomTT_inc0b

mistag_inclusive0b = plotratio(Num_inc0b, Denom_inc0b, ax = ax, histtype='errorbar', 
                           marker='.', markersize=13., color='r')

Num_inc1b = Num_cen1b + Num_fwd1b + NumTT_inc1b
Denom_inc1b = Denom_cen1b + Denom_fwd1b + DenomTT_inc1b

mistag_inclusive1b = plotratio(Num_inc1b, Denom_inc1b, ax = ax, histtype='errorbar', 
                           marker='s', markersize=7., color='g')

Num_inc2b = Num_cen2b + Num_fwd2b + NumTT_inc2b
Denom_inc2b = Denom_cen2b + Denom_fwd2b + DenomTT_inc2b

mistag_inclusive2b = plotratio(Num_inc2b, Denom_inc2b, ax = ax, histtype='errorbar', 
                           marker='^', markersize=7., color='b')

plt.ylim(bottom = 0, top = 0.15)
plt.xlim([400,7000])

ax.set_ylabel('t-tag Mistag Rates')
ax.set_title(title)
leg = ax.legend(labels=["0b", "1b", "2b"],fontsize='x-large')

# ---- Optional x-axis scaling ---- #
ax.set_xscale('function', functions=(forward, inverse))
plt.xticks(np.array([400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]))
ax.set_xticklabels(['', 500, '', '', '', '', 1000, 2000, 3000, 4000, 5000])
# plt.yticks(np.array([.05, .10]))
#ax.set_xscale('log')

#---- Labeling ----#
Lint = str(Lum_frac*.001) # Integrated Luminosity
lumi = plt.text(1.0, 1.08, "L = " + Lint[:6] + " fb$^{-1}$",
        fontsize='xx-large',
        horizontalalignment='right',
        verticalalignment='top',
        transform=ax.transAxes
       )
CMS = plt.text(0.01, 0.98, 'CMS Preliminary',
        fontsize='xx-large',
        horizontalalignment='left',
        verticalalignment='top',
        transform=ax.transAxes
       )
rap = plt.text(0.05, 0.86, 'y-inclusive',
        fontsize='large',
        horizontalalignment='left',
        verticalalignment='top',
        transform=ax.transAxes
       )
contam = plt.text(0.05, 0.81, r'$t\bar{t}$ contam. removed',
        fontsize='large',
        horizontalalignment='left',
        verticalalignment='top',
        transform=ax.transAxes
       )
coffee = plt.text(1.0, 0.87, u"☕",
              fontsize=50,
              horizontalalignment='left',
              verticalalignment='bottom',
              transform=ax.transAxes
             )

filename = f'InclusiveMistag_ContamRemoved.png'
# plt.savefig(SaveDirectory+filename, bbox_inches="tight")
print('\n' + SaveDirectory + filename + ' saved')

# Make this the new script for making mistag rate lookup tables
### Note to AC: Remove old method later and replace that script with condensed version of this (somehow...)

In [None]:
momentum = []
xaxis = Num_inc2b.axes['jetp']
for p in xaxis:
    momentum.append(p)
# momentum

In [None]:
mistags = mistag_inclusive2b[0][0][0].get_ydata()
mistags[np.isnan(mistags)] = 0

In [None]:
d = {'p': momentum, 'M(p)': mistags}
df = pd.DataFrame(data=d)
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    print(df)

In [None]:
MistagLocation = f'{maindirectory}/LookupTables/{btagdir}' 
DoesDirectoryExist(MistagLocation) 

In [None]:
df_name = f'mistag_UL{yeardir[-3:4]}{vfp}_JetHT_Data_ttContaminationRemoved_at2binc.csv'
print(MistagLocation+df_name)

In [None]:
df.to_csv(MistagLocation+df_name)