Displays unweighted plots from the corresponding Coffea outputs.  The plots are categorized according to the histogram name (dependant variable) and the tag category defined in the `TTbarResProcessor`.  
# NOTE: #
All QCD MC histograms are normalized directly to the data, as no corrections are applied via mistag analysis or modmass procedures anyways.  For a more realistic analysis, refer to `TTbarResCoffea_BkgEstAnalysis` notebook.

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import os
import hist
from coffea import util
import numpy as np
import itertools
import mplhep as hep

In [None]:
dir1 = 'CoffeaOutputsForCombine/Coffea_FirstRun/'

btagdir = ''
yeardir = '2016/'

APVDir = {
    'preVFP': 'APV/',
    'postVFP': 'noAPV/'
}

od = ''
oddir = ''
if btagdir == '':
    od = '_oldANdisc'
    oddir = 'Old2016_MediumBTag/'

# Load All Data Eras

In [None]:
JetHT2016_unweighted = {}

In [None]:
vfp = 'preVFP'
for Era in ['B', 'C', 'D', 'E', 'F']:
    JetHT2016_unwgt_str = f'TTbarRes_0l_UL16{vfp}_JetHT{Era}_Data'
    JetHT2016_unweighted[Era+'_'+vfp] = util.load(f'{dir1}JetHT/{btagdir}{yeardir}{APVDir[vfp]}{JetHT2016_unwgt_str}{od}.coffea')

In [None]:
vfp = 'postVFP'
for Era in ['F', 'G', 'H']:
    JetHT2016_unwgt_str = f'TTbarRes_0l_UL16{vfp}_JetHT{Era}_Data'
    JetHT2016_unweighted[Era+'_'+vfp] = util.load(f'{dir1}JetHT/{btagdir}{yeardir}{APVDir[vfp]}{JetHT2016_unwgt_str}{od}.coffea')

# Load All MC

In [None]:
TTbar_unweighted = {}
QCD_unweighted = {}

In [None]:
for vfp in ['preVFP', 'postVFP']:
    TTbar_unwgt_str = f'TTbarRes_0l_UL16{vfp}_TTbar'
    TTbar_unweighted[vfp] = util.load(f'{dir1}TT/{btagdir}{yeardir}{APVDir[vfp]}{TTbar_unwgt_str}{od}.coffea')
    
    QCD_unwgt_str = f'TTbarRes_0l_UL16{vfp}_QCD'
    QCD_unweighted[vfp] = util.load(f'{dir1}QCD/{btagdir}{yeardir}{APVDir[vfp]}{QCD_unwgt_str}{od}.coffea')

# The cutflow can be checked if desired 

In [None]:
EraB = np.array([])
EraC = np.array([])
EraD = np.array([])
EraE = np.array([])
EraF1 = np.array([])
EraF2 = np.array([])
EraG = np.array([])
EraH = np.array([])
for dataset,output in JetHT2016_unweighted.items():
    if 'B' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraB = np.append(EraB,j)
        print(EraB)
    elif 'C' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraC = np.append(EraC,j)
        print(EraC)
    elif 'D' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraD = np.append(EraD,j)
        print(EraD)
    elif 'E' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraE = np.append(EraE,j)
        print(EraE)
    elif 'F_pre' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraF1 = np.append(EraF1,j)
        print(EraF1)
    elif 'F_post' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraF2 = np.append(EraF2,j)
        print(EraF2)
    elif 'G' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraG = np.append(EraG,j)
        print(EraG)
    if 'H' in dataset:
        print("-------" + dataset + " Cutflow--------")
        for i,j in output['cutflow'].items(): 
            EraH = np.append(EraH,j)
        print(EraH)

In [None]:
AllData = EraB + EraC + EraD + EraE + EraF1 + EraF2 + EraG + EraH
index = 0
print("------- Unweighted Data Sum of Cutflows--------")
for i,j in JetHT2016_unweighted['B_preVFP']['cutflow'].items():
    print( '%20s : %10i' % (i,AllData[index]) )
    index+=1

In [None]:
# for dataset,output in Datasets.items():
#     print("-------" + dataset + " Cutflow--------")
#     for i,j in output['cutflow'].items():        
#         print( '%20s : %20s' % (i,j) )

## Make Save Directory

In [None]:
def mkdir_p(mypath):
    '''Creates a directory. equivalent to using mkdir -p on the command line'''

    from errno import EEXIST
    from os import makedirs,path

    try:
        makedirs(mypath)
    except OSError as exc: # Python >2.5
        if exc.errno == EEXIST and path.isdir(mypath):
            pass
        else: raise

In [None]:
def DoesDirectoryExist(mypath): #extra precaution (Probably overkill...)
    '''Checks to see if Directory exists before running mkdir_p'''
    import os.path
    from os import path
    
    if path.exists(mypath):
        pass
    else:
        mkdir_p(mypath)

# Prepare to Loop through Analysis Categories and Histograms

In [None]:
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import warnings
import re # regular expressions
warnings.filterwarnings("ignore")

# ---- Reiterate categories ---- #
ttagcats = ["AT&Pt", "at", "pret", "0t", "1t", ">=1t", "2t", ">=0t"] 
btagcats = ["0b", "1b", "2b"]
ycats = ['cen', 'fwd']

list_of_cats = [ t+b+y for t,b,y in itertools.product( ttagcats, btagcats, ycats) ]
label_dict = {i: label for i, label in enumerate(list_of_cats)}
print(label_dict)

In [None]:
maindirectory = os.getcwd() # prepare to locally save images 

# Luminosity, Cross Sections and Scale Factors

In [None]:
Nevts2016 = 625441538 # from dasgoclient
Nevts2016_sf = Nevts2016/AllData[0]

Lum2016 = 35920./Nevts2016_sf # pb^-1 from https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVAnalysisSummaryTable
# Lum2017 = 41530.
# Lum2018 = 59740.
# Lum     = 137190.

t_BR = 0.6741
ttbar_BR = 0.4544 #PDG 2019
ttbar_xs = 831.76  #pb  Monte Carlo already includes xs in event weight (if not dividing by sumw2)!!
toptag_kf = 0.49

qcd_xs = 1370000000.0 #pb From https://cms-gen-dev.cern.ch/xsdb

# =========== SF =========== #
alltt_unwgt_evts = 0
allqcd_unwgt_evts = 0

for vfp in ['preVFP', 'postVFP']:
    alltt_unwgt_evts += TTbar_unweighted[vfp]['cutflow']['all events']
    allqcd_unwgt_evts += QCD_unweighted[vfp]['cutflow']['sumw']
    
ttbar2016_sf = Lum2016*ttbar_BR*toptag_kf/alltt_unwgt_evts 
qcd2016_sf = Lum2016*qcd_xs/allqcd_unwgt_evts  

# Ratio Plot Definition

In [None]:
def plotratio(numerator, denominator, ax=None, histtype='errorbar', marker='.', markersize=5., color='k', alpha=0.1):
    NumeratorAxes = numerator.axes
    DenominatorAxes = denominator.axes
    
    # integer number of bins in this axis #
    NumeratorAxis1_BinNumber = NumeratorAxes[0].size - 3 # Subtract 3 to remove overflow
    
    DenominatorAxis1_BinNumber = DenominatorAxes[0].size - 3 
    
    if(NumeratorAxis1_BinNumber != DenominatorAxis1_BinNumber):
        raise Exception('Numerator and Denominator axes are different sizes; Cannot perform division.')
    # else:
    #     Numerator = numerator.to_hist()
    #     Denominator = denominator.to_hist()
        
    ratio = numerator / denominator.values()
    
    if histtype == 'errorbar':
        return hep.histplot(ratio, ax=ax, histtype=histtype, marker=marker, markersize=markersize, color=color)
    elif histtype == 'fill':
        return hep.histplot(ratio, ax=ax, histtype=histtype, color=color, alpha=alpha, lw=5.)
    else:
        return hep.histplot(ratio, ax=ax, histtype=histtype, color=color)

# Data and Unweighted MC Plots
### NOTE that SDmass used axes called jetmass, so code will get confused unless exception is made for SDmass

In [None]:
count = 0
PlotType = 'log'

# ---- List the Histograms Here ---- #
# list_of_hists_4vector = ['ttbarmass', 'jetpt', 'jeteta', 'jetphi', 'jety', 'jetdy', 'jetmass', 'SDmass', 'tau32']
list_of_hists_4vector = ['ttbarmass']

for ihist in list_of_hists_4vector:
    # -- split histograms into subdirectories -- #
    SaveDirectory = f'{maindirectory}/UnweightedAnalysisPlots/{yeardir}{btagdir}{oddir}{ihist}/{PlotType}/'
    DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already
    for icat, ilabel in label_dict.items(): 
        plt.rcParams.update({
        'font.size': 14,
        'axes.titlesize': 18,
        'axes.labelsize': 18,
        'xtick.labelsize': 12,
        'ytick.labelsize': 12
        })
        fig, (ax, rax) = plt.subplots(
            nrows=2,
            ncols=1,
            figsize=(10,10),
            gridspec_kw={"height_ratios": (3, 1)},
            sharex=True
        )
        fig.subplots_adjust(hspace=.07)
        title = ihist + '  ' + ilabel
        #filename = ihist + '_' + ilabel + '_LinearScale.' + 'png'
        filename = ihist + '_' + ilabel + '.' + 'png'

        
#    ===================================================================================================================
#    DDDD       A    TTTTTTT    A        H     H IIIIIII   SSSSS TTTTTTT   OOO   GGGGGGG RRRRRR     A    M     M   SSSSS     
#    D   D     A A      T      A A       H     H    I     S         T     O   O  G       R     R   A A   MM   MM  S          
#    D    D   A   A     T     A   A      H     H    I    S          T    O     O G       R     R  A   A  M M M M S           
#    D     D  AAAAA     T     AAAAA      HHHHHHH    I     SSSSS     T    O     O G  GGGG RRRRRR   AAAAA  M  M  M  SSSSS      
#    D    D  A     A    T    A     A     H     H    I          S    T    O     O G     G R   R   A     A M     M       S     
#    D   D   A     A    T    A     A     H     H    I         S     T     O   O  G     G R    R  A     A M     M      S      
#    DDDD    A     A    T    A     A     H     H IIIIIII SSSSS      T      OOO    GGGGG  R     R A     A M     M SSSSS
#    ===================================================================================================================
        # ---- initialize data histograms with first era ---- #
        JetHT2016_unwgt_str = 'UL16preVFP_JetHTB_Data'
        Data_hist = JetHT2016_unweighted['B_preVFP'][ihist][JetHT2016_unwgt_str, icat, :]
        
        # ---- Add all data together ---- #
        for vfp in ['preVFP', 'postVFP']:
            #---- Define Histograms from Coffea Outputs ----# 
            if vfp == 'preVFP':
                for Era in ['C', 'D', 'E', 'F']: #exclude B because histogram is initialized with B era
                    JetHT2016_unwgt_str = f'UL16{vfp}_JetHT{Era}_Data'
                    Data_hist += JetHT2016_unweighted[Era+'_'+vfp][ihist][JetHT2016_unwgt_str, icat, :]
            else:
                for Era in ['F', 'G', 'H']: #exclude B because histogram is initialized with B era
                    JetHT2016_unwgt_str = f'UL16{vfp}_JetHT{Era}_Data'
                    # -- For Observed Signal -- #
                    Data_hist += JetHT2016_unweighted[Era+'_'+vfp][ihist][JetHT2016_unwgt_str, icat, :]
                    
        Data_hist *= Nevts2016_sf
#    ===================================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE       QQQ     CCCC  DDDD        
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E            Q   Q   C      D   D       
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E           Q     Q C       D    D      
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE     Q     Q C       D     D     
#    N   N N O     O R   R   M     M A     A L          I      Z     E           Q   Q Q C       D    D      
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E            Q   Q   C      D   D       
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE       QQQ Q   CCCC  DDDD
#    ===================================================================================================
        
        QCD_hist = QCD_unweighted['preVFP'][ihist]['UL16preVFP_QCD', icat, :]\
                 + QCD_unweighted['postVFP'][ihist]['UL16postVFP_QCD', icat, :]
        
        QCD_hist *= qcd2016_sf #scaled according to luminosity

#    ===================================================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE     TTTTTTT TTTTTTT BBBBBB     A    RRRRRR      
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E              T       T    B     B   A A   R     R     
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E              T       T    B     B  A   A  R     R     
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE        T       T    BBBBBB   AAAAA  RRRRRR      
#    N   N N O     O R   R   M     M A     A L          I      Z     E              T       T    B     B A     A R   R       
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E              T       T    B     B A     A R    R      
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE        T       T    BBBBBB  A     A R     R 
#    ===================================================================================================================
        
        TTbar_hist = TTbar_unweighted['preVFP'][ihist]['UL16preVFP_TTbar', icat, :]\
                   + TTbar_unweighted['postVFP'][ihist]['UL16postVFP_TTbar', icat, :]
        
        TTbar_hist *= ttbar2016_sf #scaled according to luminosity

#    ===================================================================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE     RRRRRR    SSSSS GGGGGGG L       U     U   OOO   N     N     
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E           R     R  S      G       L       U     U  O   O  NN    N     
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E           R     R S       G       L       U     U O     O N N   N     
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE     RRRRRR   SSSSS  G  GGGG L       U     U O     O N  N  N     
#    N   N N O     O R   R   M     M A     A L          I      Z     E           R   R         S G     G L       U     U O     O N   N N     
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E           R    R       S  G     G L        U   U   O   O  N    NN     
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE     R     R SSSSS    GGGGG  LLLLLLL   UUU     OOO   N     N 
#    ===================================================================================================================================

        # RSG1000_hist = RSGluon1000_unweighted[ihist]['UL16postVFP_RSGluon1000', icat, :]
        # RSG1000_hist *= RSGluon1000_sf2016 #scaled according to luminosity  
        
#    ===========================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE     DDDD    M     M     
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E           D   D   MM   MM     
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E           D    D  M M M M     
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE     D     D M  M  M     
#    N   N N O     O R   R   M     M A     A L          I      Z     E           D    D  M     M     
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E           D   D   M     M     
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE     DDDD    M     M
#    ===========================================================================================
        
        # DM1000_hist = DM1000_unweighted[ihist]['UL16postVFP_DM1000', icat, :]
        # DM1000_hist *= DM1000_sf2016 #scaled according to luminosity
            
#    ===========================================================================
#    M     M    A    K     K EEEEEEE     PPPPPP  L         OOO   TTTTTTT   SSSSS     
#    MM   MM   A A   K   K   E           P     P L        O   O     T     S          
#    M M M M  A   A  K K     E           P     P L       O     O    T    S           
#    M  M  M  AAAAA  KKk     EEEEEEE     PPPPPP  L       O     O    T     SSSSS      
#    M     M A     A K  K    E           P       L       O     O    T          S     
#    M     M A     A K   K   E           P       L        O   O     T         S      
#    M     M A     A K   K   EEEEEEE     P       LLLLLLL   OOO      T    SSSSS 
#    ===========================================================================

        MC_hist = TTbar_hist.copy()
        MC_hist += QCD_hist
        
        # ---- Extract both the data and MC events and from histograms ---- #
        NtotalMC = np.sum(MC_hist.view().value)
        NtotalData = np.sum(Data_hist.view().value)
        
        # ---- Normalize the total MC histogram directly to the data (for aesthetic purposes only!) ---- #
        # -------- Unweighted simulation of the background alone greatly overestimates ------- #
        if NtotalMC > 0.:
            MC_hist *= (NtotalData/NtotalMC)
            TTbar_hist *= (NtotalData/NtotalMC)
        else:
            MC_hist *= 0.
            TTbar_hist *= 0.

        #---- Plot Data ----#
        #-----------------------------------------------------------------#
        Data_hist.plot1d(ax=ax, histtype='errorbar', marker='.', markersize=5., color='k')
        
        #---- Plot Total MC (simulated QCD + SM ttbar background)----#
        #-----------------------------------------------------------------#
        MC_hist.plot1d(ax=ax, histtype='fill', color='yellow')
        
        #---- Plot TTbar MC for comparison ---- #
        #-----------------------------------------------------------------#    
        TTbar_hist.plot1d(ax=ax, histtype='fill', color='red')
        
        #---- Plot RSG MC for comparison ---- #
        #-----------------------------------------------------------------#   
        # RSG1000_hist.plot1d(ax=ax, histtype='step', color='purple')
        
        
        #---- Plot DMM MC for comparison ----#
        #-----------------------------------------------------------------#
        # DM1000_hist.plot1d(ax=ax, histtype='step', color='black')
        
        if count < 6 and icat in range(36,42): # Print number of events for each category once
            NtotalTT = np.sum(TTbar_hist.view().value)
            print(f'\t\t{ilabel}\n===================================================')
            print('Observed Data Events   = ', '%10i'% NtotalData)
            print('Simulated TTbar Events = ', '%10i'% NtotalTT  )
            print()
            count += 1
            
        if icat in range(18,30) or icat in range(36,42):
            filename = 'AnalysisCategories/' + ihist + '_' + ilabel + '.' + 'png'
            DoesDirectoryExist(SaveDirectory+'AnalysisCategories/') # no need to create the directory several times if it exists already
        
        plt.autoscale(enable=True, axis='y')
        ax.set_ylim(bottom=1.)
        ax.set_yscale(PlotType)
        ax.set_ylabel('Events')
        ax.set_xlabel(None)
        ax.set_title(title)
        # leg = ax.legend(labels=[r'Sim. Bkg', r'$t\bar{t}$ Sim.', r'RSKK Gluon $1TeV$ Sim.', r'DM Med. $1TeV$ Sim.', r'Data'], fontsize='xx-small')
        leg = ax.legend(labels=[r'Sim. Bkg', r'$t\bar{t}$ Sim.', r'Data'], fontsize='small')
        
        
        #---- Plot Ratio ----#
        plotratio(Data_hist, MC_hist, ax = rax, histtype = 'errorbar')
        rax.set_ylabel('Data/MC')
        rax.axhline(y=1, color='k', linestyle=':')
        rax.set_ylim(0,2)
        
        if ihist == 'ttbarmass':
            rax.set_xlim(1000,5000)
        if ihist == 'jetpt':
            rax.set_xlim(400,2000)
        if ihist == 'jeteta':
            rax.set_xlim(-3,3)
        if ihist == 'tau32':
            rax.set_xlim(0,1.2)
        
        #---- Labeling ----#
        Lint = str(Lum2016*.001) # Integrated Luminosity
        lumi = plt.text(1.0, 1.06, "L = " + Lint[:6] + " fb$^{-1}$",
                fontsize='x-large',
                horizontalalignment='right',
                verticalalignment='top',
                transform=ax.transAxes
               )
        CMS = plt.text(-0.05, 1.06, 'CMS Preliminary',
                fontsize='x-large',
                horizontalalignment='left',
                verticalalignment='top',
                transform=ax.transAxes
               )
        coffee = plt.text(1.0, 0.87, u"☕",
                      fontsize=50,
                      horizontalalignment='left',
                      verticalalignment='bottom',
                      transform=ax.transAxes
                     )

#         plt.savefig(SaveDirectory+filename, bbox_inches="tight")
#         print(SaveDirectory+filename)