In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
import uproot
from pathlib import Path
import awkward as ak
import seaborn as sn
import atlas_mpl_style as ampl
ampl.use_atlas_style()  

In [2]:
# import custom functions from src folder
module_path = str(Path.cwd().parents[0] / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

from imcal import *

# Efficiency of cuts

This notebook investigates the impact of cuts on the data to know how many events should be simulated to have a set number of events after cuts.

In [3]:
N_DIMS = [2, 4, 6]
M_MIN = [10]

BH_labels = [""]*(len(N_DIMS)*len(M_MIN))
i = 0
for n in N_DIMS:
    for M in M_MIN:
        BH_labels[i] = f"BH_n{n}_M{M}"
        i=i+1
BH_data_paths = [f"/disk/atlas3/data_MC/delphes/{label}_10000events.root:Delphes" for label in BH_labels]

sph_data_paths = ["/disk/atlas3/data_MC/delphes/PP13-Sphaleron-THR9-FRZ15-NB0-NSUBPALL_10000events.root:Delphes"]
sph_labels = ["SPH_9TeV"]

##Defines the number of high pT objects, used to perform cut
min_pt = 70
max_eta = 2.4 
ST_min = 7
N_min = 5

n_BH_labels = len(BH_data_paths)
n_sph_labels = len(sph_data_paths)

N_EVENTS = 15000

labels = BH_labels + sph_labels
print(labels)

['BH_n2_M10', 'BH_n4_M10', 'BH_n6_M10', 'SPH_9TeV']


In [4]:
#Open file in with-function will close it when you exit

def get_arrays(data_paths):
    
    ST_list = [0]*len(data_paths)
    N_list = [0]*len(data_paths)
        #Load data
    for i, data_path in enumerate(data_paths):
        clusters = load_data(data_path, "Tower", 
                            ["Tower.ET", "Tower.Eta", "Tower.Phi", "Tower.Eem", "Tower.Ehad"], N_EVENTS)

        jets = load_data(data_path, "Jet", 
                                    ["Jet.PT", "Jet.Eta"], N_EVENTS)
                        
        met = load_data(data_path, "MissingET", 
                                ["MissingET.MET", "MissingET.Eta"], N_EVENTS)

        electrons = load_data(data_path, "Electron", 
                                ["Electron.PT", "Electron.Eta"], N_EVENTS)

        muons = load_data(data_path, "Muon", 
                                ["Muon.PT", "Muon.Eta"], N_EVENTS)

        photons = load_data(data_path, "Photon", 
                                ["Photon.PT", "Photon.Eta"], N_EVENTS)

        print(f"Sample: {data_path}")
        print(f"Number of events loaded: {len(clusters)}")

        #Calculate N and ST
        jets, n_jets = cut_pt_eta(jets, min_pt, max_eta)
        electrons, n_electrons = cut_pt_eta(electrons, min_pt, max_eta)
        muons, n_muons = cut_pt_eta(muons, min_pt, max_eta)
        photons, n_photons = cut_pt_eta(photons, min_pt, max_eta)
        met = cut_pt_eta_met(met, min_pt, max_eta)

        ST = calculate_ST(jets, muons, electrons, photons, met)
        N = np.array(n_jets) + np.array(n_electrons) + np.array(n_muons) + np.array(n_photons)
        ST_list[i] = ST
        N_list[i] = N

        ST_idx = np.nonzero(ST >= ST_min)
        N_idx = np.nonzero(N >= N_min)
        cut_idx = np.intersect1d(ST_idx, N_idx)

        #Apply cut
        print(f"Applying ST min cut: {ST_min} and N min cut: {N_min}")
        clusters = clusters[cut_idx]
        CUT_EVENTS = len(clusters)
        print(f"Number of events after cut: {CUT_EVENTS}") 
    
    return ST_list, N_list

BH_ST, BH_N = get_arrays(BH_data_paths)
sph_ST, sph_N = get_arrays(sph_data_paths)


Sample: /disk/atlas3/data_MC/delphes/BH_n2_M10_10000events.root:Delphes
Number of events loaded: 10000
Applying ST min cut: 7 and N min cut: 5
Number of events after cut: 5525
Sample: /disk/atlas3/data_MC/delphes/BH_n4_M10_10000events.root:Delphes
Number of events loaded: 12000
Applying ST min cut: 7 and N min cut: 5
Number of events after cut: 5855
Sample: /disk/atlas3/data_MC/delphes/BH_n6_M10_10000events.root:Delphes
Number of events loaded: 10000
Applying ST min cut: 7 and N min cut: 5
Number of events after cut: 4275
Sample: /disk/atlas3/data_MC/delphes/PP13-Sphaleron-THR9-FRZ15-NB0-NSUBPALL_10000events.root:Delphes
Number of events loaded: 10000
Applying ST min cut: 7 and N min cut: 5
Number of events after cut: 1817


In [5]:
#Dictionary 
df_dict = {}
for i, label in enumerate(BH_labels):
    df_dict[label] = pd.DataFrame({"N":BH_N[i], "ST":BH_ST[i]})

for i, label in enumerate(sph_labels):
    df_dict[label] = pd.DataFrame({"N":sph_N[i], "ST":sph_ST[i]})


In [6]:
def efficiency(dictionary, N_cut, ST_cut, labels, file):
    print(f"Efficiency for N >= {N_cut} and ST >= {ST_cut}:")
    file.write(f"Efficiency for N >= {N_cut} and ST >= {ST_cut}:\n")
    for label in labels:
        df = dictionary[label]
        N_before = len(df)
        df = df[df["N"] >= N_cut]
        df = df[df["ST"] >= ST_cut] 
        N_after = len(df)
        print(f"{label}: {N_before}, {N_after}")
        print(f"{label}: {np.round(N_after/N_before, 2)}")
        file.write(f"{label}: {np.round(N_after/N_before, 2)}\n")

N_cuts = [5]
ST_cuts = [7]

file = open("../results/Efficiencies_paper.txt", "w")

for N_cut in N_cuts:
    for ST_cut in ST_cuts:
        efficiency(df_dict, N_cut, ST_cut, labels, file)
file.close()

Efficiency for N >= 5 and ST >= 7:
BH_n2_M10: 10000, 5525
BH_n2_M10: 0.55
BH_n4_M10: 12000, 5855
BH_n4_M10: 0.49
BH_n6_M10: 10000, 4275
BH_n6_M10: 0.43
SPH_9TeV: 10000, 1817
SPH_9TeV: 0.18
