1. **Cumulative efficiency**:

   
   $\text{Eff}_{\text{cumulative}}(i) = \frac{N_{\text{after cut } i}}{N_{\text{total}}} \times 100$

3. **Per-cut efficiency**:

   
   $\text{Eff}_{\text{per cut}}(i) = \frac{N_{\text{after cut } i}}{N_{\text{after cut } (i-1)}} \times 100 \quad \text{(for } i > 0\text{)}$

5. **Statistical error for per-cut efficiency**:

   
   $\Delta \text{Eff}_{\text{per cut}}(i) = \text{Eff}_{\text{per cut}}(i) \times \sqrt{\frac{1}{N_{\text{after cut } i}} + \frac{1}{N_{\text{after cut } (i-1)}}}$

7. **Statistical error for cumulative efficiency**:

   
   $\Delta \text{Eff}_{\text{cumulative}}(i) = \text{Eff}_{\text{cumulative}}(i) \times \sqrt{\frac{1}{N_{\text{after cut } i}} + \frac{1}{N_{\text{total}}}}$

9. **$\sigma$ calculation based on per-cut efficiency difference**:

    
   $\sigma_{\text{per cut}}(i) = \frac{|\text{Eff}_{\text{ATLAS per cut}}(i) - \text{Eff}_{\text{MA5 per cut}}(i)|}{\sqrt{(\Delta \text{Eff}_{\text{ATLAS}}(i))^2 + (\Delta \text{Eff}_{\text{MA5}}(i))^2}}$

11. **\(\sigma\) calculation based on cumulative efficiency difference**:

    
   $\sigma_{\text{cumulative}}(i) = \frac{|\text{Eff}_{\text{ATLAS cumulative}}(i) - \text{Eff}_{\text{MA5 cumulative}}(i)|}{\sqrt{(\Delta \text{Eff}_{\text{ATLAS cumulative}}(i))^2 + (\Delta \text{Eff}_{\text{MA5 cumulative}}(i))^2}}$

In [1]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    '/home/yoxara/MonoXSMS/Output_MC/SUSY_MA5/SUSY_MA5_run_01_450_443/Output/SAF/pptt1jmatch_run_01_450_443/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [39598, 37547, 35412, 33319, 23134, 18801, 
                    4488, 3789, 2857, 2111, 2618, 1352, 712, 393, 204, 122, 58, 42, 55]
    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/SUSY_MA5/SUSY_MA5_run_01_450_443/Output/SAF/pptt1jmatch_run_01_450_443/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),39598,5227,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,37547,4987,94.82,95.408,0.683,1.889,0.293,0.62
2,Njets <= 4,35412,4629,89.429,88.559,0.654,1.787,0.457,0.972
3,"min Delta Phi(jets, ETmiss) cut",33319,4215,84.143,80.639,0.626,1.669,1.966,4.165
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,23134,2793,58.422,53.434,0.483,1.252,3.716,8.538
5,ETmiss>200 GeV,18801,2319,47.48,44.366,0.421,1.107,2.63,6.558
6,EM0,4488,545,11.334,10.427,0.179,0.469,1.807,8.005
7,EM1,3789,491,9.569,9.394,0.163,0.443,0.371,1.83
8,EM2,2857,329,7.215,6.294,0.14,0.358,2.397,12.762
9,EM3,2111,225,5.331,4.305,0.119,0.293,3.245,19.255
