In [None]:
import time
import os
import copy
from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
from coffea import util
from awkward import JaggedArray
import numpy as np
import glob as glob
import itertools
import pandas as pd

In [None]:
JetHT_unweighted = util.load('TTbarResCoffea_JetHT_unweighted_output.coffea')
JetHT_weighted = util.load('TTbarResCoffea_JetHT_weighted_output.coffea')
JetHT_ModMass_weighted = util.load('TTbarResCoffea_JetHT2016_DataModMass_weighted_output.coffea')

JetHT2016_unweighted = util.load('TTbarResCoffea_JetHT2016_Data_unweighted_output.coffea')
JetHT2016_weighted = util.load('TTbarResCoffea_JetHT2016_Data_weighted_output.coffea')
JetHT2016_ModMass_weighted = util.load('TTbarResCoffea_JetHT2016_DataModMass_weighted_output.coffea')

JetHT2017_unweighted = util.load('TTbarResCoffea_JetHT2017_Data_unweighted_output.coffea')
JetHT2017_weighted = util.load('TTbarResCoffea_JetHT2017_Data_weighted_output.coffea')
#JetHT2017_ModMass_weighted = util.load('TTbarResCoffea_JetHT2017_DataModMass_weighted_output.coffea')

JetHT2018_unweighted = util.load('TTbarResCoffea_JetHT2018_Data_unweighted_output.coffea')
JetHT2018_weighted = util.load('TTbarResCoffea_JetHT2018_Data_weighted_output.coffea')
#JetHT2018_ModMass_weighted = util.load('TTbarResCoffea_JetHT2018_DataModMass_weighted_output.coffea')

TTbar_unweighted = util.load('TTbarResCoffea_TTbar_unweighted_output.coffea')
TTbar_weighted = util.load('TTbarResCoffea_TTbar_weighted_output.coffea')

# ---- ttbar M.C. weighted according to JetHT year for ttbar contamination subtraction ---- #
TTbar_2016_weighted = util.load('TTbarResCoffea_TTbar_2016_weighted_output.coffea')
TTbar_2017_weighted = util.load('TTbarResCoffea_TTbar_2017_weighted_output.coffea')
TTbar_2018_weighted = util.load('TTbarResCoffea_TTbar_2018_weighted_output.coffea')

#QCD_unweighted = util.load()

In [None]:
outputs_unweighted =     {'JetHT 2016': JetHT2016_unweighted,
                          'JetHT 2017': JetHT2017_unweighted,
                          'JetHT 2018': JetHT2018_unweighted,
                          'JetHT All': JetHT_unweighted}

outputs_weighted  =      {'JetHT 2016': JetHT2016_weighted,
                          'JetHT 2017': JetHT2017_weighted,
                          'JetHT 2018': JetHT2018_weighted,
                          'JetHT All': JetHT_weighted}

outputs_weighted_ttbar = {'TTbar 2016': TTbar_2016_weighted,
                          'TTbar 2017': TTbar_2017_weighted,
                          'TTbar 2018': TTbar_2018_weighted,
                          'TTbar All': TTbar_weighted}

In [None]:
for name,output in outputs_unweighted.items(): 
    print("-------Unweighted " + name + "--------")
    for i,j in output['cutflow'].items():        
        print( '%20s : %12d' % (i,j) )

In [None]:
for name,output in outputs_weighted.items(): 
    print("-------Weighted " + name + "--------")
    for i,j in output['cutflow'].items():        
        print( '%20s : %12d' % (i,j) )

In [None]:
print("-------Unweighted TTbar--------")
for i,j in TTbar_unweighted['cutflow'].items():        
    print( '%20s : %12d' % (i,j) )

In [None]:
print("-------Weighted TTbar--------")
for i,j in TTbar_weighted['cutflow'].items():        
    print( '%20s : %12d' % (i,j) )

In [None]:
def mkdir_p(mypath):
    '''Creates a directory. equivalent to using mkdir -p on the command line'''

    from errno import EEXIST
    from os import makedirs,path

    try:
        makedirs(mypath)
    except OSError as exc: # Python >2.5
        if exc.errno == EEXIST and path.isdir(mypath):
            pass
        else: raise

In [None]:
def DoesDirectoryExist(mypath): #extra precaution (Probably overkill...)
    '''Checks to see if Directory exists before running mkdir_p'''
    import os.path
    from os import path
    
    if path.exists(mypath):
        pass
    else:
        mkdir_p(mypath)

In [None]:
import matplotlib.pyplot as plt
import warnings
import re # regular expressions
warnings.filterwarnings("ignore")

# ---- Reiterate categories ---- #
ttagcats = ["at", "0t", "1t", "2t"]
btagcats = ["0b", "1b", "2b"]
ycats = ['cen', 'fwd']

list_of_cats = [ t+b+y for t,b,y in itertools.product( ttagcats, btagcats, ycats) ]
list_of_bcats = [ b+y for b,y in itertools.product( btagcats, ycats) ]

# ---- List the Histograms Here ---- #
list_of_hists = ('ttbarmass', 'jetpt', 'jeteta', 'jetphi', 'jety', 'jetdy', 'probept', 'probep')

In [None]:
maindirectory = os.getcwd() 

In [None]:
stack_ttbar_opts = {'alpha': 0.8, 'edgecolor':(0,0,0,0.3), 'color': 'red'}
stack_background_opts = {'alpha': 0.8, 'edgecolor':(0,0,0,0.3), 'color': 'yellow'}
stack_error_opts = {'label':'Stat. Unc.', 'hatch':'///', 'facecolor':'None', 'edgecolor':(0,0,0,.5), 'linewidth': 0}
data_err_opts = {'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1}

In [None]:
""" ---------------- Luminosity and Cross Sections ---------------- """
Lum2016 = 35920. # pb^-1 from https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVAnalysisSummaryTable
Lum2017 = 41530.
Lum2018 = 59740.
Lum     = 137190.

ttbar_BR = 0.4544 #PDG 2019
ttbar_xs = 1.0 #831.76 * ttbar_BR  #pb  Monte Carlo already includes xs in event weight!!
ttbar2016_sf = ttbar_xs*Lum2016/142155064.
ttbar2017_sf = ttbar_xs*Lum2017/142155064.
ttbar2018_sf = ttbar_xs*Lum2018/142155064.
ttbar_sf = ttbar_xs*Lum/142155064.

toptag_sf = 0.87
btag_sf = 1.0
print(ttbar2016_sf)

qcd_xs = 1370000000.0 #pb From https://cms-gen-dev.cern.ch/xsdb

In [None]:
Nevts2016 = 625516390. # from dasgoclient
Nevts2016_sf = Nevts2016/JetHT2016_unweighted['cutflow']['all events']

Nevts2017 = 410461585.
Nevts2017_sf = Nevts2017/JetHT2017_unweighted['cutflow']['all events']

Nevts2018 = 676328827.
Nevts2018_sf = Nevts2018/JetHT2018_unweighted['cutflow']['all events']

NevtsAll = Nevts2016+Nevts2017+Nevts2018
Nevts_sf = NevtsAll/JetHT_unweighted['cutflow']['all events']

print(Nevts_sf)

In [None]:
# ---- Optional to rescale x-axis of mistag rates ---- #
def forward(x):
    return x**(1/2)

def inverse(x):
    return x**2

In [None]:
SaveDirectory = maindirectory + '/' + 'Mistags' + '/' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already
for icat in list_of_cats:
    if 'at' in icat:
        fig, ax = plt.subplots(
            #nrows=2,
            #ncols=1,
            figsize=(7,5),
            #gridspec_kw={"height_ratios": (3, 1)},
            sharex=True
        )
        title = 'mistag ' + icat[2:]
        
        NumeratorTT2016 = TTbar_unweighted['numerator'].integrate('anacat', icat).integrate('dataset', 'TTbar')
        DenominatorTT2016 = TTbar_unweighted['denominator'].integrate('anacat', icat).integrate('dataset', 'TTbar')
        NumeratorTT2017 = copy.copy(NumeratorTT2016)
        DenominatorTT2017 = copy.copy(DenominatorTT2016)
        NumeratorTT2018 = copy.copy(NumeratorTT2016)
        DenominatorTT2018 = copy.copy(DenominatorTT2016)
        NumeratorTT2016.scale(-ttbar2016_sf)
        DenominatorTT2016.scale(-ttbar2016_sf)
        NumeratorTT2017.scale(-ttbar2017_sf)
        DenominatorTT2017.scale(-ttbar2017_sf)
        NumeratorTT2018.scale(-ttbar2018_sf)
        DenominatorTT2018.scale(-ttbar2018_sf)
        
        Numerator2016 = JetHT2016_unweighted['numerator'].integrate('anacat', icat).integrate('dataset', 'JetHT2016_Data')
        Denominator2016 = JetHT2016_unweighted['denominator'].integrate('anacat', icat).integrate('dataset', 'JetHT2016_Data')
        Numerator2016.scale(Nevts2016_sf)
        Denominator2016.scale(Nevts2016_sf)
        Numerator2016.add(NumeratorTT2016)
        Denominator2016.add(DenominatorTT2016)
        
        Numerator2017 = JetHT2017_unweighted['numerator'].integrate('anacat', icat).integrate('dataset', 'JetHT2017_Data')
        Denominator2017 = JetHT2017_unweighted['denominator'].integrate('anacat', icat).integrate('dataset', 'JetHT2017_Data')
        Numerator2017.scale(Nevts2017_sf)
        Denominator2017.scale(Nevts2017_sf)
        Numerator2017.add(NumeratorTT2017)
        Denominator2017.add(DenominatorTT2017)
        
        Numerator2018 = JetHT2018_unweighted['numerator'].integrate('anacat', icat).integrate('dataset', 'JetHT2018_Data')
        Denominator2018 = JetHT2018_unweighted['denominator'].integrate('anacat', icat).integrate('dataset', 'JetHT2018_Data')
        Numerator2018.scale(Nevts2018_sf)
        Denominator2018.scale(Nevts2018_sf)
        Numerator2018.add(NumeratorTT2018)
        Denominator2018.add(DenominatorTT2018)
         
        mistag2016 = hist.plotratio(num = Numerator2016, denom = Denominator2016, ax=ax, clear=False,
                                error_opts={'marker': '.', 'markersize': 13., 'color': 'r', 'elinewidth': 1},
                                unc = 'num')
        mistag2017 = hist.plotratio(num = Numerator2017, denom = Denominator2017, ax=ax, clear=False,
                                error_opts={'marker': 's', 'markersize': 7., 'color': 'g', 'elinewidth': 1},
                                unc = 'num')
        mistag2018 = hist.plotratio(num = Numerator2018, denom = Denominator2018, ax=ax, clear=False,
                                error_opts={'marker': '^', 'markersize': 7., 'color': 'b', 'elinewidth': 1},
                                unc = 'num')
       
        ax.set_ylabel('Mistag Rates')
        ax.set_title(title)
        leg = ax.legend(labels=["2016", "2017", "2018"])
        
        # ---- Optional x-axis scaling ---- #
        plt.ylim(bottom = 0, top = 0.20)
        plt.xlim(left = 100, right = 3000)
        
        # ---- Optional x-axis scaling ---- #
        #ax.set_xscale('function', functions=(forward, inverse))
        #plt.xticks(np.array([0, 500, 1000, 2000, 3000, 4000, 5000]))

        # ---- Optional x-axis scaling ---- #
        #ax.set_xscale('function', functions=(forward, inverse))
        #plt.xticks(np.array([500, 1000, 2000, 5000, 10000]))
        #plt.yticks(np.array([.05, .10, .15]))
        #ax.set_xscale('log')
        
        #filename = 'Mistag_bdisc8484_ttbarSubtraction_SeparateYears_' + icat + '.png'
        #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
        #print('\n' + filename + ' saved')
    else:
        continue

In [None]:
""" ---------- Rapidity Inclusive Mistag Rates ---------- """
SaveDirectory = maindirectory + '/' + 'Mistags' + '/' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

for btag in btagcats:
    fig, ax = plt.subplots(
        figsize=(7,5),
        sharex=True
    )
    title = 'Inclusive ' + btag + ' Mistag Rate'
    
    NumeratorTT2016cen = TTbar_unweighted['numerator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'TTbar')
    DenominatorTT2016cen = TTbar_unweighted['denominator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'TTbar')
    NumeratorTT2016fwd = TTbar_unweighted['numerator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'TTbar')
    DenominatorTT2016fwd = TTbar_unweighted['denominator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'TTbar')
    NumeratorTT2017cen = copy.copy(NumeratorTT2016cen)
    DenominatorTT2017cen = copy.copy(DenominatorTT2016cen)
    NumeratorTT2017fwd = copy.copy(NumeratorTT2016fwd)
    DenominatorTT2017fwd = copy.copy(DenominatorTT2016fwd)
    NumeratorTT2018cen = copy.copy(NumeratorTT2016cen)
    DenominatorTT2018cen = copy.copy(DenominatorTT2016cen)
    NumeratorTT2018fwd = copy.copy(NumeratorTT2016fwd)
    DenominatorTT2018fwd = copy.copy(DenominatorTT2016fwd)
    
    NumeratorTT2016cen.scale(-ttbar2016_sf)
    DenominatorTT2016cen.scale(-ttbar2016_sf)
    NumeratorTT2016fwd.scale(-ttbar2016_sf)
    DenominatorTT2016fwd.scale(-ttbar2016_sf)
    NumeratorTT2017cen.scale(-ttbar2017_sf)
    DenominatorTT2017cen.scale(-ttbar2017_sf)
    NumeratorTT2017fwd.scale(-ttbar2017_sf)
    DenominatorTT2017fwd.scale(-ttbar2017_sf)
    NumeratorTT2018cen.scale(-ttbar2018_sf)
    DenominatorTT2018cen.scale(-ttbar2018_sf)
    NumeratorTT2018fwd.scale(-ttbar2018_sf)
    DenominatorTT2018fwd.scale(-ttbar2018_sf)

    
    
    Num_cen2016 = JetHT2016_unweighted['numerator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'JetHT2016_Data')
    Denom_cen2016 = JetHT2016_unweighted['denominator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'JetHT2016_Data')
    Num_cen2016.scale(Nevts2016_sf)
    Denom_cen2016.scale(Nevts2016_sf)
    Num_cen2016.add(NumeratorTT2016cen)
    Denom_cen2016.add(DenominatorTT2016cen)

    Num_fwd2016 = JetHT2016_unweighted['numerator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'JetHT2016_Data')
    Denom_fwd2016 = JetHT2016_unweighted['denominator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'JetHT2016_Data')
    Num_fwd2016.scale(Nevts2016_sf)
    Denom_fwd2016.scale(Nevts2016_sf)
    Num_fwd2016.add(NumeratorTT2016fwd)
    Denom_fwd2016.add(DenominatorTT2016fwd)

    Num_inc2016 = Num_cen2016.add(Num_fwd2016)
    Denom_inc2016 = Denom_cen2016.add(Denom_fwd2016)
    mistag_inclusive2016 = hist.plotratio(num = Num_inc2016, denom = Denom_inc2016, ax=ax, clear=False,
                                          error_opts={'marker': '.', 'markersize': 13., 'color': 'r', 'elinewidth': 1},
                                          unc = 'num')


    Num_cen2017 = JetHT2017_unweighted['numerator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'JetHT2017_Data')
    Denom_cen2017 = JetHT2017_unweighted['denominator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'JetHT2017_Data')
    Num_cen2017.scale(Nevts2017_sf)
    Denom_cen2017.scale(Nevts2017_sf)
    Num_cen2017.add(NumeratorTT2017cen)
    Denom_cen2017.add(DenominatorTT2017cen)

    Num_fwd2017 = JetHT2017_unweighted['numerator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'JetHT2017_Data')
    Denom_fwd2017 = JetHT2017_unweighted['denominator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'JetHT2017_Data')
    Num_fwd2017.scale(Nevts2017_sf)
    Denom_fwd2017.scale(Nevts2017_sf)
    Num_fwd2017.add(NumeratorTT2017fwd)
    Denom_fwd2017.add(DenominatorTT2017fwd)

    Num_inc2017 = Num_cen2017.add(Num_fwd2017)
    Denom_inc2017 = Denom_cen2017.add(Denom_fwd2017)
    mistag_inclusive2017 = hist.plotratio(num = Num_inc2017, denom = Denom_inc2017, ax=ax, clear=False,
                                          error_opts={'marker': 's', 'markersize': 7., 'color': 'g', 'elinewidth': 1},
                                          unc = 'num')



    Num_cen2018 = JetHT2018_unweighted['numerator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'JetHT2018_Data')
    Denom_cen2018 = JetHT2018_unweighted['denominator'].integrate('anacat', 'at'+btag+'cen').integrate('dataset', 'JetHT2018_Data')
    Num_cen2018.scale(Nevts2018_sf)
    Denom_cen2018.scale(Nevts2018_sf)
    Num_cen2018.add(NumeratorTT2018cen)
    Denom_cen2018.add(DenominatorTT2018cen)

    Num_fwd2018 = JetHT2018_unweighted['numerator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'JetHT2018_Data')
    Denom_fwd2018 = JetHT2018_unweighted['denominator'].integrate('anacat', 'at'+btag+'fwd').integrate('dataset', 'JetHT2018_Data')
    Num_fwd2018.scale(Nevts2018_sf)
    Denom_fwd2018.scale(Nevts2018_sf)
    Num_fwd2018.add(NumeratorTT2018fwd)
    Denom_fwd2018.add(DenominatorTT2018fwd)

    Num_inc2018 = Num_cen2018.add(Num_fwd2018)
    Denom_inc2018 = Denom_cen2018.add(Denom_fwd2018)
    mistag_inclusive2018 = hist.plotratio(num = Num_inc2018, denom = Denom_inc2018, ax=ax, clear=False,
                                          error_opts={'marker': '^', 'markersize': 7., 'color': 'b', 'elinewidth': 1},
                                          unc = 'num')
    plt.ylim(bottom = 0, top = 0.20)
    plt.xlim(left = 100, right = 7000)

    ax.set_ylabel('Mistag Rates')
    ax.set_title(title)
    leg = ax.legend(labels=["2016", "2017", "2018"])

    # ---- Optional x-axis scaling ---- #
    ax.set_xscale('function', functions=(forward, inverse))
    plt.xticks(np.array([0, 500, 1000, 2000, 3000, 4000, 5000]))
    
    # ---- Optional x-axis scaling ---- #
    #ax.set_xscale('function', functions=(forward, inverse))
    #plt.xticks(np.array([500, 1000, 2000, 5000, 10000]))
    #plt.yticks(np.array([.05, .10, .15]))
    #ax.set_xscale('log')
    
    #filename = 'InclusiveMistag_bdisc8484_ttbarSubtraction_SeparateYears_' + btag + '.png'
    #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
    #print('\n' + filename + ' saved')

In [None]:
""" ---------- Rapidity Inclusive Mistag Rate According to b tag ---------- """
SaveDirectory = maindirectory + '/' + 'Mistags' + '/' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

fig, ax = plt.subplots(
    figsize=(7,5),
    sharex=True
)
title = 'Inclusive Mistag Rate'

NumeratorTTcen0b = TTbar_unweighted['numerator'].integrate('anacat', 'at0bcen').integrate('dataset', 'TTbar')
DenominatorTTcen0b = TTbar_unweighted['denominator'].integrate('anacat', 'at0bcen').integrate('dataset', 'TTbar')
NumeratorTTfwd0b = TTbar_unweighted['numerator'].integrate('anacat', 'at0bfwd').integrate('dataset', 'TTbar')
DenominatorTTfwd0b = TTbar_unweighted['denominator'].integrate('anacat', 'at0bfwd').integrate('dataset', 'TTbar')
NumeratorTTcen1b = TTbar_unweighted['numerator'].integrate('anacat', 'at1bcen').integrate('dataset', 'TTbar')
DenominatorTTcen1b = TTbar_unweighted['denominator'].integrate('anacat', 'at1bcen').integrate('dataset', 'TTbar')
NumeratorTTfwd1b = TTbar_unweighted['numerator'].integrate('anacat', 'at1bfwd').integrate('dataset', 'TTbar')
DenominatorTTfwd1b = TTbar_unweighted['denominator'].integrate('anacat', 'at1bfwd').integrate('dataset', 'TTbar')
NumeratorTTcen2b = TTbar_unweighted['numerator'].integrate('anacat', 'at2bcen').integrate('dataset', 'TTbar')
DenominatorTTcen2b = TTbar_unweighted['denominator'].integrate('anacat', 'at2bcen').integrate('dataset', 'TTbar')
NumeratorTTfwd2b = TTbar_unweighted['numerator'].integrate('anacat', 'at2bfwd').integrate('dataset', 'TTbar')
DenominatorTTfwd2b = TTbar_unweighted['denominator'].integrate('anacat', 'at2bfwd').integrate('dataset', 'TTbar')

NumeratorTTcen0b.scale(-ttbar_sf)
DenominatorTTcen0b.scale(-ttbar_sf)
NumeratorTTfwd0b.scale(-ttbar_sf)
DenominatorTTfwd0b.scale(-ttbar_sf)
NumeratorTTcen1b.scale(-ttbar_sf)
DenominatorTTcen1b.scale(-ttbar_sf)
NumeratorTTfwd1b.scale(-ttbar_sf)
DenominatorTTfwd1b.scale(-ttbar_sf)
NumeratorTTcen2b.scale(-ttbar_sf)
DenominatorTTcen2b.scale(-ttbar_sf)
NumeratorTTfwd2b.scale(-ttbar_sf)
DenominatorTTfwd2b.scale(-ttbar_sf)
    
    

Num_cen0b = JetHT_unweighted['numerator'].integrate('anacat', 'at0bcen').integrate('dataset', 'JetHT')
Denom_cen0b = JetHT_unweighted['denominator'].integrate('anacat', 'at0bcen').integrate('dataset', 'JetHT')
Num_cen0b.scale(Nevts_sf)
Denom_cen0b.scale(Nevts_sf)
Num_cen0b.add(NumeratorTTcen0b)
Denom_cen0b.add(DenominatorTTcen0b)

Num_fwd0b = JetHT_unweighted['numerator'].integrate('anacat', 'at0bfwd').integrate('dataset', 'JetHT')
Denom_fwd0b = JetHT_unweighted['denominator'].integrate('anacat', 'at0bfwd').integrate('dataset', 'JetHT')
Num_fwd0b.scale(Nevts_sf)
Denom_fwd0b.scale(Nevts_sf)
Num_fwd0b.add(NumeratorTTfwd0b)
Denom_fwd0b.add(DenominatorTTfwd0b)

Num_inc0b = Num_cen0b.add(Num_fwd0b)
Denom_inc0b = Denom_cen0b.add(Denom_fwd0b)
mistag_inclusive0b = hist.plotratio(num = Num_inc0b, denom = Denom_inc0b, ax=ax, clear=False,
                                    error_opts={'marker': '.', 'markersize': 13., 'color': 'r', 'elinewidth': 1},
                                    unc = 'num')



Num_cen1b = JetHT_unweighted['numerator'].integrate('anacat', 'at1bcen').integrate('dataset', 'JetHT')
Denom_cen1b = JetHT_unweighted['denominator'].integrate('anacat', 'at1bcen').integrate('dataset', 'JetHT')
Num_cen1b.scale(Nevts_sf)
Denom_cen1b.scale(Nevts_sf)
Num_cen1b.add(NumeratorTTcen1b)
Denom_cen1b.add(DenominatorTTcen1b)

Num_fwd1b = JetHT_unweighted['numerator'].integrate('anacat', 'at1bfwd').integrate('dataset', 'JetHT')
Denom_fwd1b = JetHT_unweighted['denominator'].integrate('anacat', 'at1bfwd').integrate('dataset', 'JetHT')
Num_fwd1b.scale(Nevts_sf)
Denom_fwd1b.scale(Nevts_sf)
Num_fwd1b.add(NumeratorTTfwd1b)
Denom_fwd1b.add(DenominatorTTfwd1b)

Num_inc1b = Num_cen1b.add(Num_fwd1b)
Denom_inc1b = Denom_cen1b.add(Denom_fwd1b)
mistag_inclusive1b = hist.plotratio(num = Num_inc1b, denom = Denom_inc1b, ax=ax, clear=False,
                                    error_opts={'marker': 's', 'markersize': 7., 'color': 'g', 'elinewidth': 1},
                                    unc = 'num')



Num_cen2b = JetHT_unweighted['numerator'].integrate('anacat', 'at2bcen').integrate('dataset', 'JetHT')
Denom_cen2b = JetHT_unweighted['denominator'].integrate('anacat', 'at2bcen').integrate('dataset', 'JetHT')
Num_cen2b.scale(Nevts_sf)
Denom_cen2b.scale(Nevts_sf)
Num_cen2b.add(NumeratorTTcen2b)
Denom_cen2b.add(DenominatorTTcen2b)

Num_fwd2b = JetHT_unweighted['numerator'].integrate('anacat', 'at2bfwd').integrate('dataset', 'JetHT')
Denom_fwd2b = JetHT_unweighted['denominator'].integrate('anacat', 'at2bfwd').integrate('dataset', 'JetHT')
Num_fwd2b.scale(Nevts_sf)
Denom_fwd2b.scale(Nevts_sf)
Num_fwd2b.add(NumeratorTTfwd2b)
Denom_fwd2b.add(DenominatorTTfwd2b)

Num_inc2b = Num_cen2b.add(Num_fwd2b)
Denom_inc2b = Denom_cen2b.add(Denom_fwd2b)
mistag_inclusive2b = hist.plotratio(num = Num_inc2b, denom = Denom_inc2b, ax=ax, clear=False,
                                    error_opts={'marker': '^', 'markersize': 7., 'color': 'b', 'elinewidth': 1},
                                    unc = 'num')
plt.ylim(bottom = 0, top = 0.15)
plt.xlim([400,10000])

ax.set_ylabel('Mistag Rates')
ax.set_title(title)
leg = ax.legend(labels=["0b", "1b", "2b"])

# ---- Optional x-axis scaling ---- #
ax.set_xscale('function', functions=(forward, inverse))
plt.xticks(np.array([500, 1000, 2000, 5000, 10000]))
plt.yticks(np.array([.05, .10, .15]))
#ax.set_xscale('log')

#filename = 'InclusiveMistag_bdisc8484_ttbarSubtraction_AllYears.png'
#plt.savefig(SaveDirectory+filename, bbox_inches="tight")
#print('\n' + filename + ' saved')

In [None]:
""" ---------- Comparing Background Estimate to unweighted Data ---------- """
SaveDirectory = maindirectory + '/' + 'ClosureTests' + '/' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already
for b_y in list_of_bcats:
    #---- Histogram Window Config. ----# 
    plt.rcParams.update({
    'font.size': 14,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12
    })
    fig, (ax, rax) = plt.subplots(
        nrows=2,
        ncols=1,
        figsize=(7,7),
        gridspec_kw={"height_ratios": (3, 1)},
        sharex=True
    )
    fig.subplots_adjust(hspace=.07)
    #b_y = '0bcen' # b-tag category and rapidity window

    legend_labels = {'labels':['', '', 'All Probe Jets (weighted)', 'T-Tagged Probe Jets'],
                             'loc': 'upper right',
                             'fontsize': 'x-small'}

    #---- Useful Information (Debug Purposes Only)----# 
    lut_file = maindirectory + '/LookupTables/mistag_JetHT2016_Data_at' + b_y + '.csv'
    df = pd.read_csv(lut_file)
    p_vals = df['p']
    mtr = df['M(p)'].values
    MMR = mtr.sum()/mtr.size
    print('Mean Mistag Rate (MMR) = ', MMR)

    #---- Check if these two cutflows are the same ----#
    nevts = JetHT2016_unweighted['cutflow']['at'+b_y]
    bkg_nevts = JetHT2016_ModMass_weighted['cutflow']['at'+b_y]
    exp_nevts = JetHT2016_unweighted['cutflow']['Pt'+b_y]

    print('Number of Unweighted "at" Data (Cutflow) = ', nevts)
    print('Number of Weighted "at" Data (Cutflow) =   ', bkg_nevts)
    print('Number of Unweighted "Pt" Data (Cutflow) = ', exp_nevts)
    print()

    #---- Define Histogram categories of interest ----# 
    hist_wgt_anacat = 'at' + b_y # category of interest for the weighted data (all weighted probe jets)
    hist_unwgt_anacat = 'Pt' + b_y # category of interest for the un-weighted data (unweighted t-tagged probe jets)
    hist_unwgt_pretag_anacat = 'at' + b_y # (all unweighted probe jets)

    #---- Given pairs of jets where one jet is anti-tagged, show the momentum of the probe jets ----#
    hist_wgt = JetHT2016_ModMass_weighted['probep'].integrate('anacat', hist_wgt_anacat).integrate('dataset', 'JetHT2016_Data')
    hist_unwgt = JetHT2016_unweighted['probep'].integrate('anacat', hist_unwgt_anacat).integrate('dataset', 'JetHT2016_Data')
    hist_unwgt_pretag = JetHT2016_unweighted['probep'].integrate('anacat', hist_unwgt_pretag_anacat).integrate('dataset', 'JetHT2016_Data')

    hist.plot1d(hist_wgt, ax=ax, clear=True,
                        fill_opts=stack_background_opts, error_opts=stack_error_opts) # all probe jets weighted with mistag
    hist.plot1d(hist_unwgt, ax=ax, clear=False,
                        error_opts=data_err_opts,
                        legend_opts=legend_labels) # all unweighted, t-tagged probe jets

    d = {'p': p_vals, 'M(p)': mtr, 
         'unwgt at'+b_y: hist_unwgt_pretag.values()[()], 
         'wgt at'+b_y: hist_wgt.values()[()], 
         'Pt'+b_y: hist_unwgt.values()[()]}
    df1 = pd.DataFrame(data=d)
    with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
        print(df1)

    ax.set_yscale('log')
    ax.autoscale('y')
    ax.autoscale('x')#, tight=True) # doesn't look like its working...
    ax.set_ylabel('Events')
    ax.set_xlabel(None)
    ax.set_title('Probe Momentum Closure Test ' + b_y)
    #leg = ax.legend(labels=["All Probe Jets (weighted)", "T-Tagged Probe Jets"])

    #---- Plot Ratio ----#
    hist.plotratio(num = hist_unwgt, denom = hist_wgt, ax = rax,
                   error_opts=data_err_opts,
                   unc = 'num')
    rax.set_ylabel('Data/Bkg')
    rax.axhline(y=1, color='k', linestyle=':')
    rax.set_ylim(0,2)
    rax.set_xlim(0,3000)

    #---- Labeling ----#
    lumi = plt.text(1.18, 1.07, "2016 Data", #"?? fb$^{-1}$",
            fontsize=16,
            horizontalalignment='right',
            verticalalignment='top',
            transform=ax.transAxes
           )

    #filename = 'ClosureTest_2016_' + b_y + '.png'
    #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
    #print('\n' + filename + ' saved')
    #print('\n ********************************************************')
    #print()

In [None]:
""" ---------- Comparing Background Estimate to Unweighted Data (Checks One Histogram) ---------- """
""" --------------- Background: t tagged probe jet from data weighted by mistag --------------- """
""" --------------- Data: Unweighted Data from the Signal Region (2t tag region) -------------- """

SaveDirectory = maindirectory + '/' + 'BkgEstimate' + '/' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

for b_y in list_of_bcats:
    plt.rcParams.update({
    'font.size': 14,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12
    })
    fig, (ax, rax) = plt.subplots(
        nrows=2,
        ncols=1,
        figsize=(7,7),
        gridspec_kw={"height_ratios": (3, 1)},
        sharex=True
    )
    fig.subplots_adjust(hspace=.07)

    #---- Histogram to plot (probep, ttbarmass, jetpt, etc...)----#
    name = 'ttbarmass'

    #---- Print cutflow of events (Debug Purposes Only)----#
    bkg_nevts = JetHT2016_weighted['cutflow']['pret'+b_y] 
    sig_nevts = JetHT2016_unweighted['cutflow']['2t'+b_y]
    print("Background Estimate Region # of Entries = ", bkg_nevts)
    print("Signal Region # of Entries =              ", sig_nevts)
    
    

    #---- Define Histograms from Coffea Outputs ----# 
    # -- For Observed Signal -- #
    Data_hist_unwgt = JetHT2016_unweighted[name].integrate('anacat', '2t'+b_y).integrate('dataset', 'JetHT2016_Data')
    
    # -- For Background Estimate -- #
    Data_hist_wgt = JetHT2016_weighted[name].integrate('anacat', 'pret'+b_y).integrate('dataset','JetHT2016_Data')
    
    # -- For Mass Modified Background Estimate -- #
    Data_hist_modmass_wgt = JetHT2016_ModMass_weighted[name].integrate('anacat', 'pret'+b_y).integrate('dataset','JetHT2016_Data')
    
    # -- SM TTbar MC in Signal Region (Contributes to the Background) -- #
    TTbar_unwgt = TTbar_unweighted[name].integrate('anacat', '2t'+b_y).integrate('dataset', 'TTbar')
    
    # -- Mistag Weighted TTbar MC (To Remove Double Counting of Untracked t-tagged J1 Events in Bkg.Est) -- #
    TTbar_2016_wgt = TTbar_2016_weighted[name].integrate('anacat', 'pret'+b_y).integrate('dataset', 'TTbar_2016')
    
    
    

    # ---- TTbar MC Scaling ---- #
    TTbar_unwgt.scale(ttbar2016_sf*toptag_sf**2) # Prepare to include this to background
    TTbar_2016_wgt.scale(-ttbar2016_sf*toptag_sf) # Prepare to subtract this from background to correct for ttbar contamination
    
    # ---- Data Scaling ---- #
    Data_hist_unwgt.scale(Nevts2016_sf) # Observed Signal is only scaled according to Luminosity
    Data_hist_wgt.scale(Nevts2016_sf)
    Data_hist_wgt.add(TTbar_unwgt) # Include signal region SM ttbar contribution to background estimate 
    Data_hist_modmass_wgt.scale(Nevts2016_sf)
    Data_hist_modmass_wgt.add(TTbar_unwgt) # Include signal region SM ttbar contribution to modmass background estimate 
    Data_hist_modmass_wgt.add(TTbar_2016_wgt) # ttbar contamination subtraction from background estimate
    
    
    
    # ---- Background and Observed Signal for Histograms ---- #
    # -- J0 t-tagged and weighted -- #
    Background = Data_hist_wgt 
    
    # -- J0 t-tagged and weighted, J1 Mass Modified, SM ttbar included, J1 t-tagged double counts removed -- #
    Background_ModMass_Corrected = Data_hist_modmass_wgt 
    
    # -- Simple Observed Data in Signal Region -- #
    Observed = Data_hist_unwgt 
    
    
    
    # ---- Legend Lables ---- #
    legend_labels = {'labels':['', '', '', 'QCD', r'$t\bar{t}$ Sim.', 'Data'], 
                     'ncol':2, 
                     'loc': 'upper right',
                     'fontsize': 'xx-small'}

    # ---- Plot Histograms ---- #
    BackgroundPlot = hist.plot1d(Background_ModMass_Corrected, ax=ax, clear=True,
                fill_opts=stack_background_opts,
                error_opts=stack_error_opts)
    TTbarPlot = hist.plot1d(TTbar_unwgt, ax=ax, clear=False,
                fill_opts=stack_ttbar_opts,
                error_opts=stack_error_opts)
    ObservedPlot = hist.plot1d(Observed, ax=ax, clear=False,
                 error_opts=data_err_opts,
                 legend_opts=legend_labels)


    plt.ylim(bottom = .1, top = 10**4)

    ax.set_yscale('log')
    ax.autoscale('y')
    ax.autoscale('x')#, tight=True) # doesn't look like its working...
    ax.set_ylabel('Events')
    ax.set_xlabel(None)
    ax.set_title(name + ' ' + b_y)

    #---- Plot Ratio ----#
    RatioPlot = hist.plotratio(num = Observed, denom = Background_ModMass_Corrected, ax = rax,
                   error_opts={'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1},
                   unc = 'num')
    rax.set_ylabel('Data/Bkg')
    rax.axhline(y=1, color='k', linestyle=':')
    rax.set_ylim(0,2)
    #rax.set_xlim(600,3000)
    #rax.set_xlim(0,500)

    #---- Labeling ----#
    Lint = str(Lum2016*.001) # Integrated Luminosity
    lumi = plt.text(1.15, 1.07, Lint[:6] + " fb$^{-1}$",
            fontsize=16,
            horizontalalignment='right',
            verticalalignment='top',
            transform=ax.transAxes
           )

    #filename = 'BkgEst_' + name + '_' + b_y + '.png'
    filename = 'BkgEst_' + name + '_ContaminationCorrection_ModMass_' + b_y + '.png'
    #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
    print('\n' + filename + ' saved')
    print('\n ********************************************************')
    print()

In [None]:
""" ---------- Test Comparing TTbar Simulation Categories ---------- """

SaveDirectory = maindirectory + '/' + 'BkgEstimate' + '/' # split histograms into subdirectories
DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already

for b_y in list_of_bcats:
    plt.rcParams.update({
    'font.size': 14,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12
    })
    fig, (ax, rax) = plt.subplots(
        nrows=2,
        ncols=1,
        figsize=(7,7),
        gridspec_kw={"height_ratios": (3, 1)},
        sharex=True
    )
    fig.subplots_adjust(hspace=.07)

    #---- Histogram to plot (probep, ttbarmass, jetpt, etc...)----#
    name = 'ttbarmass'

    #---- Print cutflow of events (Debug Purposes Only)----#
    bkg_nevts = JetHT2016_weighted['cutflow']['pret'+b_y] 
    sig_nevts = JetHT2016_unweighted['cutflow']['2t'+b_y]
    print("Background Estimate Region # of Entries = ", bkg_nevts)
    print("Signal Region # of Entries =              ", sig_nevts)

    #---- Define Histograms from Coffea Outputs ----# 
    TTbar_unwgt = TTbar_unweighted[name].integrate('anacat', 'pret'+b_y).integrate('dataset', 'TTbar')
    TTbar_unwgt_Observed = TTbar_unweighted[name].integrate('anacat', '2t'+b_y).integrate('dataset', 'TTbar')
    TTbar_2016_wgt = TTbar_2016_weighted[name].integrate('anacat', 'pret'+b_y).integrate('dataset', 'TTbar_2016')

    # ---- Scale ---- #
    TTbar_2016_wgt.scale(ttbar2016_sf) # Prepare to subtract this from signal to correct for ttbar contamination
    TTbar_unwgt.scale(ttbar2016_sf)
    TTbar_unwgt_Observed.scale(ttbar2016_sf)
    
    # ---- Background and Signal ---


    # ---- Legend Lables ---- #
    legend_labels = {'labels':['', '', '', r'$t\bar{t}$ Sim. unweighted pretag', r'$t\bar{t}$ Sim. weighted pretag', 'Observed 2t region'], 
                     'ncol':2, 
                     'loc': 'upper right',
                     'fontsize': 'xx-small'}

    # ---- Plot Histograms ---- #
    #BackgroundPlot = hist.plot1d(Background_ModMass_Corrected, ax=ax, clear=True,
    #            fill_opts=stack_background_opts,
    #            error_opts=stack_error_opts)
    TTbar_unweightedPlot = hist.plot1d(TTbar_unwgt, ax=ax, clear=False,
                fill_opts=stack_ttbar_opts,
                error_opts=stack_error_opts)
    TTbar_weightedPlot = hist.plot1d(TTbar_2016_wgt, ax=ax, clear=False,
                fill_opts=stack_background_opts,
                error_opts=stack_error_opts)
    ObservedPlot = hist.plot1d(TTbar_unwgt_Observed, ax=ax, clear=False,
                 error_opts=data_err_opts,
                 legend_opts=legend_labels)


    plt.ylim(bottom = .1, top = 10**4)
    #plt.xlim([400,10000])

    ax.set_yscale('log')
    ax.autoscale('y')
    ax.autoscale('x')#, tight=True) # doesn't look like its working...
    ax.set_ylabel('Events')
    ax.set_xlabel(None)
    ax.set_title(name + ' ' + b_y)
    #ax.legend()

    #---- Plot Ratio ----#
    #RatioPlot = hist.plotratio(num = Observed, denom = Background_ModMass_Corrected, ax = rax,
    #               error_opts={'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1},
    #               unc = 'num')
    #rax.set_ylabel('Data/Bkg')
    #rax.axhline(y=1, color='k', linestyle=':')
    #rax.set_ylim(0,2)
    #rax.set_xlim(600,3000)
    #rax.set_xlim(0,500)

    #---- Labeling ----#
    Lint = str(Lum2016*.001) # Integrated Luminosity
    lumi = plt.text(1.15, 1.07, Lint[:6] + " fb$^{-1}$",
            fontsize=16,
            horizontalalignment='right',
            verticalalignment='top',
            transform=ax.transAxes
           )

    #filename = 'BkgEst_' + name + '_' + b_y + '.png'
    filename = 'BkgEst_' + name + '_ContaminationCorrection_ModMass_' + b_y + '.png'
    print('\n' + filename + ' saved')
    print('\n ********************************************************')
    print()