# Produces a table with the efficency for analysis cuts.
## Part 1 -> Efficiency of each cut at the final selection level.

In [1]:
import numpy as np
import pandas as pd
import ROOT as r

Welcome to JupyROOT 6.30/02


In [84]:
cutsZll = ['delta_y','n_bjets','lepiso','lep1_pt','lep2_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','inv_mass']
cutsTau = ['delta_y','n_bjets','lepiso','lep_pt','tau_pt','ljet0_pt','ljet1_pt','pt_bal','mass_jj','n_jets_interval','Z_centrality','reco_mass_','omega','rnn_score_1p','rnn_score_3p','eBDT']
finalSelection = 'Z_pt_reco_basic_cuts_ptl'
MCSample = '../MuMu/Plots/ABCD/SR/Signal_PoPy.root'
cuts = cutsZll

nEventsBeforeCut = {}
nEventsAfterCut = 0

for cut in cuts:
    file = r.TFile.Open(MCSample)
    beforeCutHistogram = file.Get(cut)
    afterCutHistogram = file.Get(finalSelection)
    
    nEventsBeforeCut[cut] = beforeCutHistogram.Integral(0,-1)
    nEventsAfterCut = afterCutHistogram.Integral(0,-1)
    
efficiencies = {}
for cut in nEventsBeforeCut:
    efficiencies[cut] = nEventsAfterCut/nEventsBeforeCut[cut]

In [85]:
table  = pd.DataFrame.from_dict(efficiencies,orient='index',columns=['Efficiency'])
table

Unnamed: 0,Efficiency
delta_y,0.986994
n_bjets,0.986431
lepiso,0.991702
lep1_pt,0.986111
lep2_pt,0.778162
ljet0_pt,0.999379
ljet1_pt,0.722386
pt_bal,0.963015
mass_jj,0.535699
n_jets_interval,0.896833


## Part 2 -> Table with | N Events | Efficiency | Total Efficiency | 

In [89]:
cutFlowZll = "dphi_drap_btag_iso_pt1_pt2_j1pt_j2pt_ptbal_mjj_nji_zcen_mass_ptl".split('_')
histoZll = 'met_basic'

cummulativeEfficiency = pd.DataFrame({'Cut':[],'N Events':[],'Efficiency':[],'Sum Efficiency':[]})

histoString = histoZll
index = 0

file = r.TFile.Open(MCSample)
nEventsBase = (file.Get(histoZll)).Integral(0,-1)
for cut in cutFlowZll:
   
    beforeCutHistogram = file.Get(histoString)
    nEvents = beforeCutHistogram.Integral(0,-1)
    #print(histoString, ' = ', nEvents)
    
    histoString += '_'+cut
    afterCutHistogram = file.Get(histoString)
    nEventsAfter = afterCutHistogram.Integral(0,-1)
    #print(histoString, ' = ', nEventsAfter)
    
    cummulativeEfficiency.loc[index] = [cut,nEvents,nEventsAfter/nEvents,nEventsAfter/nEventsBase]
    index+=1

In [87]:
cummulativeEfficiency

Unnamed: 0,Cut,N Events,Efficiency,Sum Efficiency
0,dphi,12632.508067,1.0,1.0
1,drap,12632.508067,0.873335,0.873335
2,btag,11032.417442,0.942584,0.823192
3,iso,10398.983773,0.975314,0.802871
4,pt1,10142.275961,0.911424,0.731756
5,pt2,9243.912436,0.595478,0.435744
6,j1pt,5504.54299,0.932059,0.40614
7,j2pt,5130.561056,0.601881,0.244448
8,ptbal,3087.986446,0.942601,0.230417
9,mjj,2910.739396,0.509753,0.117456


## Part 4 
### Table giving the yields with uncertainties FOR INCLUSIVE region.

In [4]:
samplesPath = "/Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/"
channel = "Ztautau_"
samples = {"Data":0.0,"Signal_Sherpa":0.0,"Signal_PoPy":0.0,channel+"MGRW":0.0,channel+"MGNLORW":0.0,
          channel+"SherpaRW":0.0,channel+"SherpaNLORW":0.0,"Higgs":0.0,"ttbar":0.0,"singletop":0.0,"VV":0.0,
          "Wjets":0.0,"Zjets":0.0,"MJ":0.0,"VV_EWK":0.0}
histogramName = "n_jets_interval"

from ctypes import c_double

In [5]:
for sampleName in samples.keys():
    try :
        print(sampleName)
        
        if "Inclusive" in samplesPath:
            # Zero jets in the gap region (for Zll) / For Ztautau in the SR look for events with centrality between 0 and 1 -> zero jets in the gap
            file = r.TFile.Open(samplesPath+sampleName+".root","READ")
            histogram = file.Get(histogramName)
            total = histogram.Integral(1,-1)
            totalE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
            print("Total = ",total)
            file.Close()
            samples[sampleName] = {"Yield":round(total,3),"Stat":round(totalE,3)}
        
        else:
            # Zero jets in the gap region (for Zll) / For Ztautau in the SR look for events with centrality between 0 and 1 -> zero jets in the gap
            if "Tau" in samplesPath or "MuEle" in samplesPath:
                file = r.TFile.Open(samplesPath+"SR/"+sampleName+".root","READ")
                histogram = file.Get("Z_centrality")
                errorDouble= c_double(0.0)
                zeroGapJets = histogram.IntegralAndError(1,100,errorDouble)
                zeroGapJetsE = errorDouble.value
            else :
                file = r.TFile.Open(samplesPath+"SR/"+sampleName+".root","READ")
                histogram = file.Get(histogramName)
                zeroGapJets = histogram.Integral(1,-1)
                zeroGapJetsE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
            print("0 Gap Jets = ",zeroGapJets)
            file.Close()

            # One jet in the gap region (for Zll) / For Ztautau in the CR look for events with one jet in the gap -> any centrality
            if "Tau" in samplesPath or "MuEle" in samplesPath:
                file = r.TFile.Open(samplesPath+"CR/"+sampleName+".root","READ")
                histogram = file.Get(histogramName)
                oneGapJets = histogram.GetBinContent(2)
                oneGapJetsE = histogram.GetBinError(2)
            else :
                file = r.TFile.Open(samplesPath+"CRc/"+sampleName+".root","READ")
                histogram = file.Get(histogramName)
                oneGapJets = histogram.Integral(1,-1)
                oneGapJetsE = np.sqrt(histogram.GetBinError(1)**2 + histogram.GetBinError(2)**2)
            print("1 Gap Jets = ",oneGapJets)
            file.Close()

            samples[sampleName] = {"Yield":round(zeroGapJets+oneGapJets,3),"Stat":round(np.sqrt(zeroGapJetsE**2+oneGapJetsE**2),3)}
            file.Close()
        
    except OSError:
        samples[sampleName] = {"Yield":"N/A","Stat":"N/A"}
        
# Create Latex format
latex = {}
for sampleName in samples.keys():
    latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
    latex[sampleName] = latexString 

Data
0 Gap Jets =  322.0
Signal_Sherpa
0 Gap Jets =  92.83777888538316
Signal_PoPy
0 Gap Jets =  104.11233960068785
Ztautau_MGRW
0 Gap Jets =  187.04709400236607
Ztautau_MGNLORW
Ztautau_SherpaRW
0 Gap Jets =  181.8386713564396
Ztautau_SherpaNLORW
Higgs
0 Gap Jets =  10.272100184243754
ttbar
0 Gap Jets =  6.739538364112377
singletop
0 Gap Jets =  1.0357631966471672
VV
0 Gap Jets =  6.121451486455044
Wjets
0 Gap Jets =  0.0
Zjets
0 Gap Jets =  0.7981715723872185
MJ
0 Gap Jets =  6.710715472698212
VV_EWK
0 Gap Jets =  12.148259653011337


  latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/CR/Data.root does not exist
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/CR/Signal_Sherpa.root does not exist
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/CR/Signal_PoPy.root does not exist
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/CR/Ztautau_MGRW.root does not exist
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/SR/Ztautau_MGNLORW.root does not exist
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/Z-peak/CR/Ztautau_SherpaRW.root does not exist
Error in <TFile::TFile>: file /Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/

In [21]:
table  = pd.DataFrame.from_dict(samples,orient='index',columns=['Yield',"Stat"])
table

Unnamed: 0,Yield,Stat
Data,416.0,20.396
Signal_Sherpa,92.208,1.518
Signal_PoPy,103.057,1.063
Ztautau_MGRW,478.468,32.682
Ztautau_MGNLORW,247.593,11.529
Ztautau_SherpaRW,343.377,14.618
Ztautau_SherpaNLORW,341.012,18.708
Higgs,11.922,0.214
ttbar,7.155,1.01
singletop,0.876,0.31


In [22]:
latexTable  = pd.DataFrame.from_dict(latex,orient='index',columns=["LatexFormat"])
latexTable.to_csv("test.csv")

## Part 5
### Table giving the yields with uncertainties FOR SIGNAL region.

In [13]:
samplesPath = "/Users/user/Documents/HEP/VBF-Analysis/VBFAnalysisPlots/TauTau/TauhadTaulep/High-Mass/NewBDTTightTauOS/"
channel = "Ztautau_"
samples = {"Data":0.0,"Signal_Sherpa":0.0,"Signal_PoPy":0.0,channel+"MGRW":0.0,
          channel+"SherpaRW":0.0,"Higgs":0.0,"ttbar":0.0,"singletop":0.0,"VV":0.0,
          "Wjets":0.0,"Zjets":0.0,"MJ":0.0,"VV_EWK":0.0}
histogramName = "n_jets_interval"

from ctypes import c_double

In [14]:
for sampleName in samples.keys():
    try :
        print(sampleName)
        # Zero jets in the gap region BIN
        file = r.TFile.Open(samplesPath+sampleName+".root","READ")
        histogram = file.Get(histogramName)
        zeroGapJets = histogram.GetBinContent(1)
        zeroGapJetsE = np.sqrt(histogram.GetBinError(1)**2)
        print("0 Gap Jets = ",zeroGapJets)
        file.Close()
        samples[sampleName] = {"Yield":round(zeroGapJets,3),"Stat":round(np.sqrt(zeroGapJetsE**2),3)}

    except:
        samples[sampleName] = {"Yield":"N/A","Stat":"N/A"}
        


table  = pd.DataFrame.from_dict(samples,orient='index',columns=['Yield',"Stat"])
table

Data
0 Gap Jets =  0.0
Signal_Sherpa
0 Gap Jets =  3.044281482696533
Signal_PoPy
0 Gap Jets =  3.7036261558532715
Ztautau_MGRW
0 Gap Jets =  1.8484238386154175
Ztautau_SherpaRW
0 Gap Jets =  0.6298002004623413
Higgs
0 Gap Jets =  0.9722560048103333
ttbar
0 Gap Jets =  1.3410611152648926
singletop
0 Gap Jets =  0.15813344717025757
VV
0 Gap Jets =  0.8411335945129395
Wjets
0 Gap Jets =  0.0
Zjets
0 Gap Jets =  0.0
MJ
0 Gap Jets =  0.9657964706420898
VV_EWK
0 Gap Jets =  2.8157694339752197


Unnamed: 0,Yield,Stat
Data,0.0,0.0
Signal_Sherpa,3.044,0.281
Signal_PoPy,3.704,0.205
Ztautau_MGRW,1.848,1.403
Ztautau_SherpaRW,0.63,0.531
Higgs,0.972,0.056
ttbar,1.341,0.459
singletop,0.158,0.158
VV,0.841,0.09
Wjets,0.0,0.0


In [6]:
# Create Latex format
latex = {}
for sampleName in samples.keys():
    print(sampleName)
    latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
    latex[sampleName] = latexString 
latexTable  = pd.DataFrame.from_dict(latex,orient='index',columns=["LatexFormat"])
latexTable.to_csv("test.csv")

Data
Signal_Sherpa
Signal_PoPy
Ztautau_MGRW
Ztautau_SherpaRW
Higgs
ttbar
singletop
VV
Wjets
Zjets
MJ
VV_EWK


  latexString = "$"+str(samples[sampleName]["Yield"])+" \pm "+str(samples[sampleName]["Stat"])+"$"
