# Produces a table with the efficency for analysis cuts.
## Part 1 -> Efficiency of each cut at the final selection level.

In [75]:
import numpy as np
import pandas as pd
import ROOT as r

In [84]:
cutsZll = ['delta_y','n_bjets','lepiso','lep1_pt','lep2_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','inv_mass']
cutsTau = ['delta_y','n_bjets','lepiso','lep_pt','tau_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','reco_mass_','omega','rnn_score_1p','rnn_score_3p','eBDT']
finalSelection = 'Z_pt_reco_basic_cuts_ptl'
MCSample = '../MuMu/Plots/ABCD/SR/Signal_PoPy.root'
cuts = cutsZll

nEventsBeforeCut = {}
nEventsAfterCut = 0

for cut in cuts:
    file = r.TFile.Open(MCSample)
    beforeCutHistogram = file.Get(cut)
    afterCutHistogram = file.Get(finalSelection)
    
    nEventsBeforeCut[cut] = beforeCutHistogram.Integral(0,-1)
    nEventsAfterCut = afterCutHistogram.Integral(0,-1)
    
efficiencies = {}
for cut in nEventsBeforeCut:
    efficiencies[cut] = nEventsAfterCut/nEventsBeforeCut[cut]

In [85]:
table  = pd.DataFrame.from_dict(efficiencies,orient='index',columns=['Efficiency'])
table

Unnamed: 0,Efficiency
delta_y,0.986994
n_bjets,0.986431
lepiso,0.991702
lep1_pt,0.986111
lep2_pt,0.778162
ljet0_pt,0.999379
ljet1_pt,0.722386
pt_bal,0.963015
mass_jj,0.535699
n_jets_interval,0.896833


## Part 2 -> Table with | N Events | Efficiency | Total Efficiency | 

In [89]:
cutFlowZll = "dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl".split('_')
histoZll = 'met_basic'

cummulativeEfficiency = pd.DataFrame({'Cut':[],'N Events':[],'Efficiency':[],'Sum Efficiency':[]})

histoString = histoZll
index = 0

file = r.TFile.Open(MCSample)
nEventsBase = (file.Get(histoZll)).Integral(0,-1)
for cut in cutFlowZll:
   
    beforeCutHistogram = file.Get(histoString)
    nEvents = beforeCutHistogram.Integral(0,-1)
    #print(histoString, ' = ', nEvents)
    
    histoString += '_'+cut
    afterCutHistogram = file.Get(histoString)
    nEventsAfter = afterCutHistogram.Integral(0,-1)
    #print(histoString, ' = ', nEventsAfter)
    
    cummulativeEfficiency.loc[index] = [cut,nEvents,nEventsAfter/nEvents,nEventsAfter/nEventsBase]
    index+=1

In [87]:
cummulativeEfficiency

Unnamed: 0,Cut,N Events,Efficiency,Sum Efficiency
0,dphi,12632.508067,1.0,1.0
1,drap,12632.508067,0.873335,0.873335
2,btag,11032.417442,0.942584,0.823192
3,iso,10398.983773,0.975314,0.802871
4,pt1,10142.275961,0.911424,0.731756
5,pt2,9243.912436,0.595478,0.435744
6,j1pt,5504.54299,0.932059,0.40614
7,j2pt,5130.561056,0.601881,0.244448
8,ptbal,3087.986446,0.942601,0.230417
9,mjj,2910.739396,0.509753,0.117456


## Part 4 
### Table giving the yields with uncertainties.

In [90]:
samplesPath = "/Users/diegomac/Documents/HEP/VBF-Analysis/MuEle/Plots/Z-peak/"
channel = "Ztautau_"
samples = {"Data":0.0,"Signal_Sherpa":0.0,"Signal_PoPy":0.0,channel+"MG":0.0,channel+"MGNLO":0.0,
          channel+"Sherpa":0.0,channel+"SherpaNLO":0.0,"Higgs":0.0,"ttbar":0.0,"singletop":0.0,"VV":0.0,
          "Wjets":0.0,"Zjets":0.0,"MJ":0.0}
histogramName = "n_jets_interval"

In [91]:
for sampleName in samples.keys():
    try :
        print(sampleName)
        # Zero jets in the gap region
        file = r.TFile.Open(samplesPath+"SR/"+sampleName+".root","READ")
        histogram = file.Get(histogramName)
        zeroGapJets = histogram.Integral(1,-1)
        zeroGapJetsE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
        print("0 Gap Jets = ",zeroGapJets)
        file.Close()
        
        # One jet in the gap region
        if "Tau" in samplesPath or "MuEle" in samplesPath:
            file = r.TFile.Open(samplesPath+"CR/"+sampleName+".root","READ")
            histogram = file.Get(histogramName)
            oneGapJets = histogram.GetBinContent(2)
            oneGapJetsE = histogram.GetBinError(2)
        else :
            file = r.TFile.Open(samplesPath+"CRc/"+sampleName+".root","READ")
            histogram = file.Get(histogramName)
            oneGapJets = histogram.Integral(1,-1)
            oneGapJetsE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
        print("1 Gap Jets = ",oneGapJets)
        file.Close()
        
        samples[sampleName] = {"Yield":round(zeroGapJets+oneGapJets,3),"Stat":round(np.sqrt(zeroGapJetsE**2+oneGapJetsE**2),3)}
        file.Close()
        
    except OSError:
        samples[sampleName] = {"Yield":"N/A","Stat":"N/A"}
        
# Create Latex format
latex = {}
for sampleName in samples.keys():
    latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
    latex[sampleName] = latexString 

Data
0 Gap Jets =  31.0
1 Gap Jets =  11.0
Signal_Sherpa
0 Gap Jets =  7.8194615840911865
1 Gap Jets =  0.9043526649475098
Signal_PoPy
0 Gap Jets =  9.043976128101349
1 Gap Jets =  0.8464422225952148
Ztautau_MG
0 Gap Jets =  53.53075981140137
1 Gap Jets =  22.12066650390625
Ztautau_MGNLO
Ztautau_Sherpa
0 Gap Jets =  31.8325138092041
1 Gap Jets =  16.289884567260742
Ztautau_SherpaNLO
Higgs
0 Gap Jets =  0.044603342190384865
1 Gap Jets =  0.008988275192677975
ttbar
0 Gap Jets =  3.3252075910568237
1 Gap Jets =  2.7190322875976562
singletop
0 Gap Jets =  0.3676448166370392
1 Gap Jets =  0.3771066963672638
VV
0 Gap Jets =  1.7162659168243408
1 Gap Jets =  1.1672616004943848
Wjets
0 Gap Jets =  0.0
1 Gap Jets =  0.0
Zjets
0 Gap Jets =  0.2501137852668762
1 Gap Jets =  0.0
MJ


Error in <TFile::TFile>: file /Users/diegomac/Documents/HEP/VBF-Analysis/MuEle/Plots/Z-peak/SR/Ztautau_MGNLO.root does not exist
Error in <TFile::TFile>: file /Users/diegomac/Documents/HEP/VBF-Analysis/MuEle/Plots/Z-peak/SR/Ztautau_SherpaNLO.root does not exist
Error in <TFile::TFile>: file /Users/diegomac/Documents/HEP/VBF-Analysis/MuEle/Plots/Z-peak/SR/MJ.root does not exist


In [92]:
table  = pd.DataFrame.from_dict(samples,orient='index',columns=['Yield',"Stat"])
table

Unnamed: 0,Yield,Stat
Data,42.0,6.481
Signal_Sherpa,8.724,0.451
Signal_PoPy,9.89,0.32
Ztautau_MG,75.651,12.807
Ztautau_MGNLO,,
Ztautau_Sherpa,48.122,2.337
Ztautau_SherpaNLO,,
Higgs,0.054,0.019
ttbar,6.044,0.921
singletop,0.745,0.3


In [93]:
latexTable  = pd.DataFrame.from_dict(latex,orient='index',columns=["LatexFormat"])
latexTable.to_csv("test.csv")