Import required modules

In [None]:
import ROOT # a modular scientific software toolkit
import math # general mathematical function
import numpy as np # the fundamental package for scientific computing with Python
from statsmodels.stats.proportion import proportion_confint # confidence interval for a binomial proportion
import matplotlib.pyplot as plt # a plotting library

In [None]:
c = ROOT.TCanvas() # ROOT requires to have a canvas to show plots in Jupyter.

Definitions of common types and constants used in the code below:
* __GenMatch__ - tau candidate gen matching
* __channels__ - channel names;
* __samples__ - list of samples;
* __sample_names__ - list of names of the samples for visualization purposes;
* __working_points__ - avaliable working points for tau against electron discriminator
* __path__ - path to the input root files

In [None]:
class GenMatch:
    Electron = 1    # reco tau is matched to prompt electron
    Muon = 2        # reco tau is matched to prompt muon
    TauElectron = 3 # reco tau is matched to an electron from tau decay
    TauMuon = 4     # reco tau is matched to an muon from tau decay
    Tau = 5         # reco tau is matched to tau, which decayed in hadrons + neutrions
    NoMatch = 6     # reco tau do not matched to any lepton at the generator level (i.e. quark or gluon jet)

channels = [ 'eTau', 'muTau' ]
samples = [ 'DY', 'HH' ]
sample_names = [ '$Z/\gamma^*\\to\\ell\\ell + jets$', '$gg \\to HH \\to bb\\tau\\tau$']
working_points = [ 'None', 'VLoose', 'Loose', 'Medium', 'Tight', 'VTight' ]
path = './'

In [None]:
def GetHistograms(path, sample, channel, wp):
    """The function that reads selection and gen match histograms from a root file produced by Production.py.
    Each file is expected to have the following name format: {sample}_{wp}.root.
    """
    file = ROOT.TFile('{}/{}_{}.root'.format(path, sample, wp), 'OPEN')
    sel_events = file.Get('{}_stat/Selection_events'.format(channel))
    sel_events.SetDirectory(0)
    sel_tau = file.Get('{}_stat/Selection_SignalTaus'.format(channel))
    sel_tau.SetDirectory(0)
    gen_match_hist_name = '{}_{}_{}_gen'.format(sample, channel, wp)
    tree = file.Get(channel)
    tree.Draw('gen_match_2>>{}(6, 0.5, 6.5)'.format(gen_match_hist_name))
    gen_match_hist = ROOT.gROOT.FindObject(gen_match_hist_name).Clone()
    gen_match_hist.SetDirectory(0)
    return sel_events, sel_tau, gen_match_hist

Load and draw histograms for Drell-Yan, e-tau channel, VLoose against electron working point

In [None]:
... 

Calculate the efficiency of the anti-electron discriminator and the contamination by the electrons in the final selection

In [None]:
def ratio(n_passed, n_total):
    """Ratio between the number of passed and the total number of events
       with a 68% binomial confidence interval."""
    r = n_passed / n_total
    r_low, r_up = proportion_confint(n_passed, n_total, alpha=(1-0.68), method='beta')
    return r, r_low, r_up

In [None]:
def GetBinIndex(hist, bin_name):
    """Return the index of the bin with the label equal to `bin_name`."""
    axis = hist.GetXaxis()
    n_bins = axis.GetNbins()
    for bin_id in range(1, axis.GetNbins() + 1):
        if axis.GetBinLabel(bin_id) == bin_name:
            return bin_id
    raise RuntimeError('Bin "{}" not found.'.format(bin_name))
    
def GetBinContent(hist, bin_name):
    """Return the bin content of the bin with the label equal to `bin_name`."""
    return hist.GetBinContent(GetBinIndex(hist, bin_name))

In [None]:
...
print("Anti-ele discriminator efficiency: {:.2f}% CI=({:.2f}, {:.2f})%" \
      .format(anti_ele_eff*100, anti_ele_eff_low*100, anti_ele_eff_up*100))
print("Electron contamination: {:.2f}% CI=({:.2f}, {:.2f})%" \
      .format(cont*100, cont_low*100, cont_up*100))

Load and plot the overal selection efficiency, efficiency of the anti-electron cut and the contamination by the electrons in the final selection as a function of the working point.

In [None]:
n_wp = len(working_points)
selection_efficiency = {}
anti_ele_efficiency = {}
contamination = {}

for channel in channels:
    selection_efficiency[channel] = {}
    anti_ele_efficiency[channel] = {}
    contamination[channel] = {}
    for sample in samples:
        selection_efficiency[channel][sample] = np.zeros((n_wp, 3))
        anti_ele_efficiency[channel][sample] = np.zeros((n_wp, 3))
        contamination[channel][sample] = np.zeros((n_wp, 3))
        ...

In [None]:
fig, ax_list = plt.subplots(1, len(channels), figsize=(15,7))
for channel_index in range(len(channels)):
    channel = channels[channel_index]
    ax = ax_list[channel_index]
    for sample in samples:
        v = selection_efficiency[channel][sample]
        ax.errorbar(working_points, v[:, 0], yerr=[v[:, 0] - v[:, 1], v[:, 2] - v[:, 0]])
    ax.tick_params(labelsize=14)
    ax.set_title("Selection efficiency ({})".format(channel), fontsize=20)
    ax.legend(sample_names, fontsize=14, loc='upper right')

plt.show()
fig.savefig('selection_efficiency.pdf', bbox_inches='tight')

In [None]:
fig, ax_list = plt.subplots(1, len(channels), figsize=(15,7))
for channel_index in range(len(channels)):
    channel = channels[channel_index]
    ax = ax_list[channel_index]
    for sample in samples:
        v = anti_ele_efficiency[channel][sample]
        ax.errorbar(working_points, v[:, 0], yerr=[v[:, 0] - v[:, 1], v[:, 2] - v[:, 0]])
    ax.tick_params(labelsize=14)
    ax.set_title("Tau anti-electron cut efficiency ({})".format(channel), fontsize=20)
    ax.legend(sample_names, fontsize=14, loc='upper right')

plt.show()
fig.savefig('anti_ele_rejection.pdf', bbox_inches='tight')

In [None]:
fig, ax_list = plt.subplots(1, len(channels), figsize=(15,7))
for channel_index in range(len(channels)):
    channel = channels[channel_index]
    ax = ax_list[channel_index]
    for sample in samples:
        v = contamination[channel][sample]
        ax.errorbar(working_points, v[:, 0], yerr=[v[:, 0] - v[:, 1], v[:, 2] - v[:, 0]])
    ax.tick_params(labelsize=14)
    ax.set_title("Contamination by electrons faking tau ({})".format(channel), fontsize=16)
    ax.legend(sample_names, fontsize=14, loc='upper right')

plt.show()
fig.savefig('contamination.pdf', bbox_inches='tight')

Estimate relative significance improvement as a function of the working point. As an estimator use S/sqrt(B).

In [None]:
...