# Produces a table with the efficency for analysis cuts.
## Part 1 -> Efficiency of each cut at the final selection level.

In [6]:
import numpy as np
import pandas as pd
import ROOT as r

In [7]:
cutsZll = ['delta_y','n_bjets','lepiso','lep1_pt','lep2_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','inv_mass']
cutsTau = ['delta_y','n_bjets','lepiso','lep_pt','tau_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','reco_mass_','omega','rnn_score_1p','rnn_score_3p','eBDT']
finalSelection = 'Z_pt_reco_basic_all'
MCSample = '~/Downloads/VBF_Zee_MG.root'
cuts = cutsZll

nEventsBeforeCut = {}
nEventsAfterCut = 0

for cut in cuts:
    file = r.TFile.Open(MCSample)
    beforeCutHistogram = file.Get(cut)
    afterCutHistogram = file.Get(finalSelection)
    
    nEventsBeforeCut[cut] = beforeCutHistogram.Integral(0,-1)
    nEventsAfterCut = afterCutHistogram.Integral(0,-1)
    
efficiencies = {}
for cut in nEventsBeforeCut:
    efficiencies[cut] = nEventsAfterCut/nEventsBeforeCut[cut]

In [8]:
table  = pd.DataFrame.from_dict(efficiencies,orient='index',columns=['Efficiency'])
table

Unnamed: 0,Efficiency
delta_y,0.97766
n_bjets,0.970418
lepiso,0.944187
lep1_pt,0.976974
lep2_pt,0.768259
ljet0_pt,0.999212
ljet1_pt,0.752858
pt_bal,0.935369
mass_jj,0.549122
n_jets_interval,0.740292


## Part 2 -> Table with | N Events | Efficiency | Total Efficiency | 

In [None]:
def find_cut_chain(root_file, histogram_name):
    # Get all the keys in the file
    keys = root_file.GetListOfKeys()
    # Loop over the keys and find the one that contains the histogram which has more characters.
    longest_name = ''
    for key in keys:
        if histogram_name in key.GetName():
            if len(key.GetName()) > len(longest_name):
                longest_name = key.GetName()
    return longest_name if longest_name != '' else 'not found'

histoZll = 'mass_jj_'
cummulativeEfficiency = pd.DataFrame({'Cut':[],'N Events':[],'Efficiency (%)':[],'Cummulative Efficiency (%)':[]})
MCSample = '~/Downloads/VBF_Zee_MG.root'
histoString = histoZll

index = 0
file = r.TFile.Open(MCSample)
cuts_vector = find_cut_chain(file,histoZll).replace(histoZll,'').split('_')
nEventsBase = (file.Get(histoZll+cuts_vector[0])).Integral(0,-1)
cummulativeEfficiency.loc[-1] = ["basic",round(nEventsBase,2),100,100]

before_cut_name = histoZll+cuts_vector[0]
for cut in cuts_vector[1:]:
    beforeCutHistogram = file.Get(before_cut_name)
    nEvents = beforeCutHistogram.Integral(0,-1)
    #print(before_cut_name, ' = ', nEvents)
    
    before_cut_name += '_'+cut
    #print(before_cut_name)
    afterCutHistogram = file.Get(before_cut_name)
    nEventsAfter = afterCutHistogram.Integral(0,-1)
    
    
    cummulativeEfficiency.loc[index] = [cut,round(nEventsAfter,2),round(100*nEventsAfter/nEvents,1),round(100*nEventsAfter/nEventsBase,1)]
    index+=1


mass_jj_basic  =  6798.905951436493
mass_jj_basic_dphi
mass_jj_basic_dphi  =  6798.905951436493
mass_jj_basic_dphi_drap
mass_jj_basic_dphi_drap  =  6382.571680194582
mass_jj_basic_dphi_drap_btag
mass_jj_basic_dphi_drap_btag  =  5978.447620716644
mass_jj_basic_dphi_drap_btag_iso
mass_jj_basic_dphi_drap_btag_iso  =  5306.092652597581
mass_jj_basic_dphi_drap_btag_iso_pt1
mass_jj_basic_dphi_drap_btag_iso_pt1  =  4782.856447655475
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2  =  2827.7324431898305
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt  =  2662.2698309642146
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt  =  1867.6143055148423
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal  =  1739.6520747400937
mass_jj_basic_dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj
mass_jj_basic_dphi_drap_bta

In [29]:

cummulativeEfficiency

Unnamed: 0,Cut,N Events,Efficiency (%),Cummulative Efficiency (%)
-1,basic,6798.91,100.0,100.0
0,dphi,6798.91,100.0,100.0
1,drap,6382.57,93.9,93.9
2,btag,5978.45,93.7,87.9
3,iso,5306.09,88.8,78.0
4,pt1,4782.86,90.1,70.3
5,pt2,2827.73,59.1,41.6
6,j1pt,2662.27,94.1,39.2
7,j2pt,1867.61,70.2,27.5
8,ptbal,1739.65,93.1,25.6


## Part 4 
### Table giving the yields with uncertainties FOR INCLUSIVE region.

In [None]:
samplesPath = "/Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/"
channel = "Ztautau_"
samples = {"Data":0.0,"Signal_Sherpa":0.0,"Signal_PoPy":0.0,channel+"MGRW":0.0,channel+"MGNLORW":0.0,
          channel+"SherpaRW":0.0,channel+"SherpaNLORW":0.0,"Higgs":0.0,"ttbar":0.0,"singletop":0.0,"VV":0.0,
          "Wjets":0.0,"Zjets":0.0,"MJ":0.0,"VV_EWK":0.0}
histogramName = "n_jets_interval"

from ctypes import c_double

In [None]:
for sampleName in samples.keys():
    try :
        print(sampleName)
        
        if "Inclusive" in samplesPath:
            # Zero jets in the gap region (for Zll) / For Ztautau in the SR look for events with centrality between 0 and 1 -> zero jets in the gap
            file = r.TFile.Open(samplesPath+sampleName+".root","READ")
            histogram = file.Get(histogramName)
            total = histogram.Integral(1,-1)
            totalE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
            print("Total = ",total)
            file.Close()
            samples[sampleName] = {"Yield":round(total,3),"Stat":round(totalE,3)}
        
        else:
            # Zero jets in the gap region (for Zll) / For Ztautau in the SR look for events with centrality between 0 and 1 -> zero jets in the gap
            if "Tau" in samplesPath or "MuEle" in samplesPath:
                file = r.TFile.Open(samplesPath+"SR/"+sampleName+".root","READ")
                histogram = file.Get("Z_centrality")
                errorDouble= c_double(0.0)
                zeroGapJets = histogram.IntegralAndError(1,100,errorDouble)
                zeroGapJetsE = errorDouble.value
            else :
                file = r.TFile.Open(samplesPath+"SR/"+sampleName+".root","READ")
                histogram = file.Get(histogramName)
                zeroGapJets = histogram.Integral(1,-1)
                zeroGapJetsE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
            print("0 Gap Jets = ",zeroGapJets)
            file.Close()

            # One jet in the gap region (for Zll) / For Ztautau in the CR look for events with one jet in the gap -> any centrality
            if "Tau" in samplesPath or "MuEle" in samplesPath:
                file = r.TFile.Open(samplesPath+"CR/"+sampleName+".root","READ")
                histogram = file.Get(histogramName)
                oneGapJets = histogram.GetBinContent(2)
                oneGapJetsE = histogram.GetBinError(2)
            else :
                file = r.TFile.Open(samplesPath+"CRc/"+sampleName+".root","READ")
                histogram = file.Get(histogramName)
                oneGapJets = histogram.Integral(1,-1)
                oneGapJetsE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
            print("1 Gap Jets = ",oneGapJets)
            file.Close()

            samples[sampleName] = {"Yield":round(zeroGapJets+oneGapJets,3),"Stat":round(np.sqrt(zeroGapJetsE**2+oneGapJetsE**2),3)}
            file.Close()
        
    except OSError:
        samples[sampleName] = {"Yield":"N/A","Stat":"N/A"}
        
# Create Latex format
latex = {}
for sampleName in samples.keys():
    latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
    latex[sampleName] = latexString 

In [None]:
table  = pd.DataFrame.from_dict(samples,orient='index',columns=['Yield',"Stat"])
table

In [None]:
latexTable  = pd.DataFrame.from_dict(latex,orient='index',columns=["LatexFormat"])
latexTable.to_csv("test.csv")

## Part 5
### Table giving the yields with uncertainties FOR SIGNAL region.

In [None]:
samplesPath = "/Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/TauhadTaulep/High-Mass/NewBDTMJSS/"
channel = "Ztautau_"
samples = {"Data":0.0,"Signal_Sherpa":0.0,"Signal_PoPy":0.0,channel+"MGRW":0.0,
          channel+"SherpaRW":0.0,"Higgs":0.0,"ttbar":0.0,"singletop":0.0,"VV":0.0,
          "Wjets":0.0,"Zjets":0.0,"MJ":0.0,"VV_EWK":0.0,"W_EWK_Sherpa":0.0}
histogramName = "n_bjets"

from ctypes import c_double

In [None]:
for sampleName in samples.keys():
    try :
        print(sampleName)
        # Zero jets in the gap region BIN
        file = r.TFile.Open(samplesPath+sampleName+".root","READ")
        histogram = file.Get(histogramName)
        zeroGapJets = histogram.GetBinContent(1)
        zeroGapJetsE = np.sqrt(histogram.GetBinError(1)**2)
        print("0 Gap Jets = ",zeroGapJets)
        file.Close()
        samples[sampleName] = {"Yield":round(zeroGapJets,1),"Stat":round(np.sqrt(zeroGapJetsE**2),1)}

    except:
        samples[sampleName] = {"Yield":"N/A","Stat":"N/A"}
        


table  = pd.DataFrame.from_dict(samples,orient='index',columns=['Yield',"Stat"])
table

In [None]:
# Create Latex format
latex = {}
for sampleName in samples.keys():
    print(sampleName)
    latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
    latex[sampleName] = latexString 
latexTable  = pd.DataFrame.from_dict(latex,orient='index',columns=["LatexFormat"])
latexTable.to_csv("test.csv")