Displays unweighted plots from the corresponding Coffea outputs.  The plots are categorized according to the histogram name (dependant variable) and the tag category defined in the `TTbarResProcessor`.  
# NOTE: #
For a more realistic analysis with mistag rates, SFs, etc..., refer to `TTbarResCoffea_BkgEstAnalysis` notebook.

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import os
import copy
import hist
from coffea import util
import numpy as np
import itertools
import pandas as pd
import uproot
import mplhep as hep

In [None]:
dir1 = 'CoffeaOutputsForCombine/Coffea_FirstRun/'

btagdir = 'MediumBTag/'
yeardir = '2016/'
btagType = 'DeepCSV/'
APVDir = {
    'preVFP': 'APV/',
    'postVFP': 'noAPV/'
}

od = ''
oddir = ''
if btagdir == '':
    od = '_oldANdisc'
    oddir = 'Old2016_MediumBTag/'

# Load All Data Eras

In [None]:
import LoadData as LD

JetHT2016_unweighted = LD.JetHT_Unweighted('', 2016)

# Load All MC

In [None]:
import LoadMC as LM

TTbar2016_unweighted = LM.TTbar_Unweighted('', 2016)
QCD2016_unweighted = LM.QCD_Unweighted('', 2016)
# DM2016_unweighted = LM.DM_Unweighted('', 2016)
# RSGluon2016_unweighted = LM.RSGluon_Unweighted('', 2016)

# Prepare to Loop through Analysis Categories and Histograms

In [None]:
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import warnings
import re # regular expressions
import HelpfulPlottingDefs as hpd
warnings.filterwarnings("ignore")

# ---- Reiterate categories ---- #
ttagcats = ["AT&Pt", "at", "pret", "0t", "1t", ">=1t", "2t", ">=0t"] 
btagcats = ["0b", "1b", "2b"]
ycats = ['cen', 'fwd']

list_of_cats = [ t+b+y for t,b,y in itertools.product( ttagcats, btagcats, ycats) ]
label_dict = {i: label for i, label in enumerate(list_of_cats)}
print(label_dict)

In [None]:
maindirectory = os.getcwd() # prepare to locally save images 

# Luminosity, Cross Sections and Scale Factors

In [None]:
Lum2016 = 35920. # pb^-1 from https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVAnalysisSummaryTable
# Lum2017 = 41530.
# Lum2018 = 59740.
# Lum     = 137190.

t_BR = 0.6741
ttbar_BR = 0.4544 #PDG 2019
ttbar_xs1 = 831.76 * (0.09210) #pb For ttbar mass from 700 to 1000
ttbar_xs2 = 831.76 * (0.02474) #pb For ttbar mass from 1000 to Inf
toptag_sf = 0.9
toptag_kf = 1.0 #0.7
qcd_xs = 1370000000.0 #pb From https://cms-gen-dev.cern.ch/xsdb
Lum = {
    'preVFP': 19800.,
    'postVFP': 16128.
}


# =========== SF =========== #
ttbar2016_sf = {}
DM2016_sf, RSGluon2016_sf = {}, {}
qcd2016_sf = {}

for vfp in ['preVFP', 'postVFP']:
    ttbar2016_sf[f'700_1000_{vfp}'] = Lum[vfp]*ttbar_xs1*toptag_sf**2*toptag_kf/TTbar2016_unweighted[f'700_1000_{vfp}']['cutflow']['sumw']
    ttbar2016_sf[f'1000_Inf_{vfp}'] = Lum[vfp]*ttbar_xs2*toptag_sf**2*toptag_kf/TTbar2016_unweighted[f'1000_Inf_{vfp}']['cutflow']['sumw']
    qcd2016_sf[vfp] = Lum[vfp]*qcd_xs/QCD2016_unweighted[vfp]['cutflow']['sumw']
    # for mass in range(1000, 5500, 500):
    #     DM2016_sf[str(mass)+'_'+vfp] = Lum[vfp]/DM2016_unweighted[str(mass)+'_'+vfp]['cutflow']['sumw']
    #     RSGluon2016_sf[str(mass)+'_'+vfp] = Lum[vfp]/RSGluon2016_unweighted[str(mass)+'_'+vfp]['cutflow']['all events']

# Data and Unweighted MC Plots
### NOTE that SDmass used axes called jetmass, so code will get confused unless exception is made for SDmass

In [None]:
PlotType = 'log'
DataOutName = f'{maindirectory}/UnweightedAnalysisPlots/{yeardir}{btagdir}{oddir}{btagType}SignalRegionOutput.txt'
filler = 'w'
saveOn = False

In [None]:
count = 0
# ---- List the Histograms Here ---- #
# list_of_hists_4vector = ['ttbarmass', 'jetpt', 'jeteta', 'jetphi', 'jety', 'jetdy', 'jetmass', 'SDmass', 'tau32']
list_of_hists_4vector = ['ttbarmass']

for ihist in list_of_hists_4vector:
    # -- split histograms into subdirectories -- #
    SaveDirectory = f'{maindirectory}/UnweightedAnalysisPlots/{yeardir}{btagdir}{oddir}{btagType}{ihist}/{PlotType}/'
    hpd.DoesDirectoryExist(SaveDirectory) # no need to create the directory several times if it exists already
    for icat, ilabel in label_dict.items(): 
        plt.rcParams.update({
        'font.size': 14,
        'axes.titlesize': 18,
        'axes.labelsize': 18,
        'xtick.labelsize': 12,
        'ytick.labelsize': 12
        })
        fig, (ax, rax) = plt.subplots(
            nrows=2,
            ncols=1,
            figsize=(10,10),
            gridspec_kw={"height_ratios": (3, 1)},
            sharex=True
        )
        fig.subplots_adjust(hspace=.07)
        title = ihist + '  ' + ilabel
        #filename = ihist + '_' + ilabel + '_LinearScale.' + 'png'
        filename = ihist + '_' + ilabel + '.' + 'png'

        
#    ===================================================================================================================
#    DDDD       A    TTTTTTT    A        H     H IIIIIII   SSSSS TTTTTTT   OOO   GGGGGGG RRRRRR     A    M     M   SSSSS     
#    D   D     A A      T      A A       H     H    I     S         T     O   O  G       R     R   A A   MM   MM  S          
#    D    D   A   A     T     A   A      H     H    I    S          T    O     O G       R     R  A   A  M M M M S           
#    D     D  AAAAA     T     AAAAA      HHHHHHH    I     SSSSS     T    O     O G  GGGG RRRRRR   AAAAA  M  M  M  SSSSS      
#    D    D  A     A    T    A     A     H     H    I          S    T    O     O G     G R   R   A     A M     M       S     
#    D   D   A     A    T    A     A     H     H    I         S     T     O   O  G     G R    R  A     A M     M      S      
#    DDDD    A     A    T    A     A     H     H IIIIIII SSSSS      T      OOO    GGGGG  R     R A     A M     M SSSSS
#    ===================================================================================================================

        
        Data_hist = LD.AddEraHists(JetHT2016_unweighted, 2016, ihist, icat)
           
                    
#    ===================================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE       QQQ     CCCC  DDDD        
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E            Q   Q   C      D   D       
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E           Q     Q C       D    D      
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE     Q     Q C       D     D     
#    N   N N O     O R   R   M     M A     A L          I      Z     E           Q   Q Q C       D    D      
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E            Q   Q   C      D   D       
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE       QQQ Q   CCCC  DDDD
#    ===================================================================================================

        QCD_hist = QCD2016_unweighted['preVFP'][ihist]['UL16preVFP_QCD', icat, :]*qcd2016_sf['preVFP']\
                 + QCD2016_unweighted['postVFP'][ihist]['UL16postVFP_QCD', icat, :]*qcd2016_sf['postVFP']

#    ===================================================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE     TTTTTTT TTTTTTT BBBBBB     A    RRRRRR      
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E              T       T    B     B   A A   R     R     
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E              T       T    B     B  A   A  R     R     
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE        T       T    BBBBBB   AAAAA  RRRRRR      
#    N   N N O     O R   R   M     M A     A L          I      Z     E              T       T    B     B A     A R   R       
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E              T       T    B     B A     A R    R      
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE        T       T    BBBBBB  A     A R     R 
#    ===================================================================================================================
        
        TTbar_hist = LM.ScaledTTbar(TTbar2016_unweighted, 2016, ihist, icat, ttbar2016_sf) # SM TTbar MC in Signal Region

#    ===================================================================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE     RRRRRR    SSSSS GGGGGGG L       U     U   OOO   N     N     
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E           R     R  S      G       L       U     U  O   O  NN    N     
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E           R     R S       G       L       U     U O     O N N   N     
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE     RRRRRR   SSSSS  G  GGGG L       U     U O     O N  N  N     
#    N   N N O     O R   R   M     M A     A L          I      Z     E           R   R         S G     G L       U     U O     O N   N N     
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E           R    R       S  G     G L        U   U   O   O  N    NN     
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE     R     R SSSSS    GGGGG  LLLLLLL   UUU     OOO   N     N 
#    ===================================================================================================================================

        # RSG1000_hist = RSGluon1000_unweighted[ihist]['UL16postVFP_RSGluon1000', icat, :]
        # RSG1000_hist *= RSGluon1000_sf2016 #scaled according to luminosity  
        
#    ===========================================================================================
#    N     N   OOO   RRRRRR  M     M    A    L       IIIIIII ZZZZZZZ EEEEEEE     DDDD    M     M     
#    NN    N  O   O  R     R MM   MM   A A   L          I         Z  E           D   D   MM   MM     
#    N N   N O     O R     R M M M M  A   A  L          I        Z   E           D    D  M M M M     
#    N  N  N O     O RRRRRR  M  M  M  AAAAA  L          I       Z    EEEEEEE     D     D M  M  M     
#    N   N N O     O R   R   M     M A     A L          I      Z     E           D    D  M     M     
#    N    NN  O   O  R    R  M     M A     A L          I     Z      E           D   D   M     M     
#    N     N   OOO   R     R M     M A     A LLLLLLL IIIIIII ZZZZZZZ EEEEEEE     DDDD    M     M
#    ===========================================================================================
        
        # DM1000_hist = DM1000_unweighted[ihist]['UL16postVFP_DM1000', icat, :]
        # DM1000_hist *= DM1000_sf2016 #scaled according to luminosity
            
#    ===========================================================================
#    M     M    A    K     K EEEEEEE     PPPPPP  L         OOO   TTTTTTT   SSSSS     
#    MM   MM   A A   K   K   E           P     P L        O   O     T     S          
#    M M M M  A   A  K K     E           P     P L       O     O    T    S           
#    M  M  M  AAAAA  KKk     EEEEEEE     PPPPPP  L       O     O    T     SSSSS      
#    M     M A     A K  K    E           P       L       O     O    T          S     
#    M     M A     A K   K   E           P       L        O   O     T         S      
#    M     M A     A K   K   EEEEEEE     P       LLLLLLL   OOO      T    SSSSS 
#    ===========================================================================

        MC_hist = TTbar_hist.copy()
        MC_hist += QCD_hist
        
        # ---- Extract both the data and MC events and from histograms ---- #
        NtotalMC = np.sum(MC_hist.view().value)
        NtotalData = np.sum(Data_hist.view().value)
        
        # ---- Normalize the total MC histogram directly to the data (for aesthetic purposes only!) ---- #
        # -------- Unweighted simulation of the background alone greatly overestimates ------- #
#         if NtotalMC > 0.:
#             MC_hist *= (NtotalData/NtotalMC)
#             TTbar_hist *= (NtotalData/NtotalMC)
#         else:
#             MC_hist *= 0.
#             TTbar_hist *= 0.
        
#         if ilabel == '0t0bcen' or ilabel == '0t1bcen':
#             print(f'{ilabel} category bins: ', QCD_hist.view().variance)
        
        #---- Plot Data ----#
        #-----------------------------------------------------------------#
        Data_hist.plot1d(ax=ax, histtype='errorbar', marker='.', markersize=5., color='k')
        
        #---- Plot Total MC (simulated QCD + SM ttbar background)----#
        #-----------------------------------------------------------------#
        MC_hist.plot1d(ax=ax, histtype='fill', color='yellow')
        
        #---- Plot TTbar MC for comparison ---- #
        #-----------------------------------------------------------------#    
        TTbar_hist.plot1d(ax=ax, histtype='fill', color='red')
        
        #---- Plot RSG MC for comparison ---- #
        #-----------------------------------------------------------------#   
        # RSG1000_hist.plot1d(ax=ax, histtype='step', color='purple')
        
        
        #---- Plot DMM MC for comparison ----#
        #-----------------------------------------------------------------#
        # DM1000_hist.plot1d(ax=ax, histtype='step', color='black')
        
        if count > 0:
            filler = 'a'
        if count < 6 and icat in range(36,42): # Print number of events for each category once
            with open(DataOutName, filler) as f:
                NtotalTT = np.sum(TTbar_hist.view().value)
                print(f'\t\t{ilabel}\n===================================================', file=f)
                print('Observed Data Events   = ', '%10i'% NtotalData, file=f)
                print('Simulated TTbar Events = ', '%10i'% NtotalTT,   file=f)
                print()
                count += 1
            
        # if icat in range(18,30) or icat in range(36,42):
        #     filename = 'AnalysisCategories/' + ihist + '_' + ilabel + '.' + 'png'
        #     hpdDoesDirectoryExist(SaveDirectory+'AnalysisCategories/') # no need to create the directory several times if it exists already
        
        plt.autoscale(enable=True, axis='y')
        ax.set_ylim(bottom=1.)
        ax.set_yscale(PlotType)
        ax.set_ylabel('Events')
        ax.set_xlabel(None)
        ax.set_title(title)
        # leg = ax.legend(labels=[r'Sim. Bkg', r'$t\bar{t}$ Sim.', r'RSKK Gluon $1TeV$ Sim.', r'DM Med. $1TeV$ Sim.', r'Data'], fontsize='xx-small')
        leg = ax.legend(labels=[r'Sim. Bkg', r'$t\bar{t}$ Sim.', r'Data'], fontsize='small')
        
        
        #---- Plot Ratio ----#
        # hpd.plotratio(QCD_hist, Background, ax = rax, histtype='errorbar', marker='.', markersize=4., color='k')
        hpd.plotratio(Data_hist, MC_hist, ax = rax, histtype = 'errorbar')
        rax.set_ylabel('Data/MC')
        rax.axhline(y=1, color='k', linestyle=':')
        rax.set_ylim(0,2)
        
        if ihist == 'ttbarmass':
            rax.set_xlim(1000,5000)
        if ihist == 'jetpt':
            rax.set_xlim(400,2000)
        if ihist == 'jeteta':
            rax.set_xlim(-3,3)
        if ihist == 'tau32':
            rax.set_xlim(0,1.2)
        
        #---- Labeling ----#
        Lint = str(Lum2016*.001) # Integrated Luminosity
        lumi = plt.text(1.0, 1.06, "L = " + Lint[:6] + " fb$^{-1}$",
                fontsize='x-large',
                horizontalalignment='right',
                verticalalignment='top',
                transform=ax.transAxes
               )
        CMS = plt.text(-0.05, 1.06, 'CMS Preliminary',
                fontsize='x-large',
                horizontalalignment='left',
                verticalalignment='top',
                transform=ax.transAxes
               )
        coffee = plt.text(1.0, 0.87, u"â˜•",
                      fontsize=50,
                      horizontalalignment='left',
                      verticalalignment='bottom',
                      transform=ax.transAxes
                     )
        if saveOn:
            if icat in range(18,30) or icat in range(36,42):
                filename = 'AnalysisCategories/' + ihist + '_' + ilabel + '.' + 'png'
                hpd.DoesDirectoryExist(SaveDirectory+'AnalysisCategories/') # no need to create the directory several times if it exists already
            plt.savefig(SaveDirectory+filename, bbox_inches="tight")
            print(SaveDirectory+filename)