# Post Processing

In [None]:
import gzip
import pickle

In [None]:
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use(hep.style.CMS)
from coffea import hist
from plots.helpers import makePlot2, merge_histos

## Import Histograms

In [None]:
from yaml import Loader, Dumper
import yaml

with open('../data/samples.yaml', 'r') as f:
    samples = yaml.load(f, Loader = Loader)

In [None]:
samples.keys()

In [None]:
path = '../analysis/backgrounds_pres.pkl.gz'
with gzip.open(path) as fin:
    output_flat= pickle.load(fin)

In [None]:
path = '../analysis/meta_pres.pkl.gz'
with gzip.open(path) as fin:
    meta= pickle.load(fin)

In [None]:
output_flat['cutflow'].keys()

In [None]:
output_flat['cutflow']['ZJetsToNuNu_HT-1200To2500_14TeV-madgraph_200PU']

In [None]:
meta

In [None]:
import math
import pandas as pd
import numpy as np

In [None]:
def getCutFlowTable(output, processes=['tW_scattering', 'TTW', 'ttbar'], lines=['skim', 'twoJet', 'oneBTag'], significantFigures=3, absolute=True, signal=None, total=False):
    '''
    Takes the output of a coffea processor (i.e. a python dictionary) and returns a formated cut-flow table of processes.
    Lines and processes have to follow the naming of the coffea processor output.
    '''
    res = {}
    eff = {}
    for proc in processes:
        res[proc] = {line: "%s +/- %s"%(round(output[proc][line], significantFigures-len(str(int(output[proc][line])))), round(math.sqrt(output[proc][line+'_w2']), significantFigures-len(str(int(output[proc][line]))))) for line in lines}
        
        # for efficiencies. doesn't deal with uncertainties yet
        eff[proc] = {lines[i]: round(output[proc][lines[i]]/output[proc][lines[i-1]], significantFigures) if (i>0 and output[proc][lines[i-1]]>0) else 1. for i,x in enumerate(lines)}
    
    if total:
        res['total'] = {line: "%s"%round( sum([ output[proc][line] for proc in total ] ), significantFigures-len(str(int(sum([ output[proc][line] for proc in total ] ))))) for line in lines }
    
    # if a signal is specified, calculate S/B
    if signal is not None:
        backgrounds = copy.deepcopy(processes)
        for s in signal:
            backgrounds.remove(s)
        res['S/B'] = {line: round( sum([output[s][line] for s in signal])/sum([ output[proc][line] for proc in backgrounds ]) if sum([ output[proc][line] for proc in backgrounds ])>0 else 1, significantFigures) for line in lines }
            
    if not absolute:
        res=eff
    df = pd.DataFrame(res)
    df = df.reindex(lines) # restores the proper order
    return df

In [None]:
getCutFlowTable(output_flat, processes=['W3JetsToLNu_TuneCUETP8M1_14TeV-madgraphMLM-pythia8_200PU'], lines=['total'])

## Setting Up Plots

In [None]:
N_bins = hist.Bin('multiplicity', r'$N$', 6, -0.5, 5.5)
mass_bins = hist.Bin('mass', r'$M\ (GeV)$', 40, 0, 400)
ht_bins = hist.Bin('pt', r'$H_{T}\ (GeV)$', 60, 0, 3000)
pt_bins = hist.Bin('pt', r'$p_{T}\ (GeV)$', 80, 200, 1000)
eta_bins = hist.Bin("eta", "$\eta$", 33, -4, 4)
phi_bins = hist.Bin("phi", "$\phi$", 33, -4, 4)
deltaR_bins = hist.Bin("deltaR", "$\DeltaR$", 10, 0, 1)
tau1_bins = hist.Bin("tau", "$\tau_1$", 10, 0, 0.7)
tau2_bins = hist.Bin("tau", "$\tau_2$", 10, 0, 0.5)
tau3_bins = hist.Bin("tau", "$\tau_3$", 10, 0, 0.4)
tau4_bins = hist.Bin("tau", "$\tau_4$", 10, 0, 0.3)

labels ={
    ('ZJetsToNuNu_HT',): r'$ZJets\to\nu\nu\ (binned\ by\ HT)$',
    ('WJetsToLNu_Njet',): r'$WJets\to L\nu\ (binned\ by\ N_{jets})$',
    #('TT_TuneCUETP8M2T4_14TeV-powheg-pythia8_200PU',): r'$t\bar{t}$',
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_150_MH2_1500_MHC_1500',): '2HDMa_bb_1500_150_10',
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_250_MH2_1500_MHC_1500',): '2HDMa_bb_1500_250_10',
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_350_MH2_1500_MHC_1500',): '2HDMa_bb_1500_350_10',
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_500_MH2_1500_MHC_1500',): '2HDMa_bb_1500_500_10',
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_750_MH2_1500_MHC_1500',): '2HDMa_bb_1500_750_10',
}

colors ={
    ('ZJetsToNuNu_HT',): '#355C7D',
    ('WJetsToLNu_Njet',): '#FED23F',
    #('TT_TuneCUETP8M2T4_14TeV-powheg-pythia8_200PU',): '#EB7DB5',
}
        
signals = [
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_150_MH2_1500_MHC_1500',),
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_250_MH2_1500_MHC_1500',), 
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_350_MH2_1500_MHC_1500',), 
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_500_MH2_1500_MHC_1500',), 
    ('2HDMa_bb_sinp_0.35_tanb_1.0_mXd_10_MH3_1500_MH4_750_MH2_1500_MHC_1500',)
]

In [None]:
makePlot2(output_flat, 'met', 'pt', pt_bins, r'$MET_{pt}\ (GeV)$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_pt', 'pt', pt_bins, r'$p_{T}\ (GeV)$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_eta', 'eta', eta_bins, r'$\eta$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_phi', 'phi', phi_bins, r'$\phi$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_sdmass', 'mass', mass_bins, r'$mass\ (GeV)$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_tau1', 'tau', tau1_bins, r'$\tau_1$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_tau2', 'tau', tau2_bins, r'$\tau_2$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_tau3', 'tau', tau3_bins, r'$\tau_3$', labels, colors, signals=signals)
makePlot2(output_flat, 'lead_fatjet_tau4', 'tau', tau4_bins, r'$\tau_4$', labels, colors, signals=signals)
makePlot2(output_flat, 'nfatjet', 'multiplicity', N_bins, r'$n_{fatjet}$', labels, colors, signals=signals)
makePlot2(output_flat, 'ht', 'pt', ht_bins, r'$H_{T}$', labels, colors, signals=signals)