# Produces a table with the efficency for analysis cuts.
## Part 1 -> Efficiency of each cut at the final selection level.

In [1]:
import numpy as np
import pandas as pd
import ROOT as r

In [7]:
cutsZll = ['delta_y','n_bjets','lepiso','lep1_pt','lep2_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','inv_mass']
cutsTau = ['delta_y','n_bjets','lepiso','lep_pt','tau_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','reco_mass_','omega','rnn_score_1p','rnn_score_3p','eBDT']
finalSelection = 'Z_pt_reco_basic_all'
MCSample = '~/Downloads/VBF_Zee_MG.root'
cuts = cutsZll

nEventsBeforeCut = {}
nEventsAfterCut = 0

for cut in cuts:
    file = r.TFile.Open(MCSample)
    beforeCutHistogram = file.Get(cut)
    afterCutHistogram = file.Get(finalSelection)
    
    nEventsBeforeCut[cut] = beforeCutHistogram.Integral(0,-1)
    nEventsAfterCut = afterCutHistogram.Integral(0,-1)
    
efficiencies = {}
for cut in nEventsBeforeCut:
    efficiencies[cut] = nEventsAfterCut/nEventsBeforeCut[cut]

In [8]:
table  = pd.DataFrame.from_dict(efficiencies,orient='index',columns=['Efficiency'])
table

Unnamed: 0,Efficiency
delta_y,0.97766
n_bjets,0.970418
lepiso,0.944187
lep1_pt,0.976974
lep2_pt,0.768259
ljet0_pt,0.999212
ljet1_pt,0.752858
pt_bal,0.935369
mass_jj,0.549122
n_jets_interval,0.740292


## Part 2 -> Table with | N Events | Efficiency | Total Efficiency | 

In [8]:
def find_cut_chain(root_file, histogram_name):
    # Get all the keys in the file
    keys = root_file.GetListOfKeys()
    # Loop over the keys and find the one that contains the histogram which has more characters.
    longest_name = ''
    for key in keys:
        if histogram_name in key.GetName():
            if len(key.GetName()) > len(longest_name):
                longest_name = key.GetName()
    return longest_name if longest_name != '' else 'not found'

histoZll = 'mass_jj_'
cummulativeEfficiency = pd.DataFrame({'Cut':[],'N Events':[],'Efficiency (%)':[],'Cummulative Efficiency (%)':[]})
MCSample = '../VBFAnalysisPlots/MadgraphStudies/MG_Sherpa_PoPy_Efficiencies/Signal_PoPy_MGfiducial.root'
histoString = histoZll

index = 0
file = r.TFile.Open(MCSample)
cuts_vector = find_cut_chain(file,histoZll).replace(histoZll,'').split('_')
nEventsBase = (file.Get(histoZll+cuts_vector[0])).Integral(0,-1)
cummulativeEfficiency.loc[-1] = ["basic",round(nEventsBase,2),100,100]

before_cut_name = histoZll+cuts_vector[0]
for cut in cuts_vector[1:]:
    beforeCutHistogram = file.Get(before_cut_name)
    nEvents = beforeCutHistogram.Integral(0,-1)
    #print(before_cut_name, ' = ', nEvents)
    
    before_cut_name += '_'+cut
    #print(before_cut_name)
    afterCutHistogram = file.Get(before_cut_name)
    nEventsAfter = afterCutHistogram.Integral(0,-1)
    
    
    cummulativeEfficiency.loc[index] = [cut,round(nEventsAfter,2),round(100*nEventsAfter/nEvents,1),round(100*nEventsAfter/nEventsBase,1)]
    index+=1


In [9]:

cummulativeEfficiency

Unnamed: 0,Cut,N Events,Efficiency (%),Cummulative Efficiency (%)
-1,basic,7444.97,100.0,100.0
0,dphi,7444.97,100.0,100.0
1,drap,7066.17,94.9,94.9
2,btag,6724.73,95.2,90.3
3,iso,6054.39,90.0,81.3
4,pt1,5448.91,90.0,73.2
5,pt2,3377.16,62.0,45.4
6,j1pt,3168.05,93.8,42.6
7,j2pt,2126.84,67.1,28.6
8,ptbal,2030.47,95.5,27.3
