In [3]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,3324,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,3184,95.195,95.788,1.344,2.375,0.217,0.623
2,Njets <= 4,9455,3088,91.957,92.9,1.31,2.322,0.354,1.026
3,"min Delta Phi(jets, ETmiss) cut",9104,2969,88.543,89.32,1.274,2.255,0.3,0.878
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,2017,64.598,60.68,1.017,1.713,1.967,6.066
5,ETmiss>200 GeV,5317,1700,51.712,51.143,0.874,1.525,0.324,1.099
6,EM0,1346,389,13.091,11.703,0.379,0.627,1.894,10.603
7,EM1,1045,352,10.163,10.59,0.33,0.594,0.628,4.194
8,EM2,771,240,7.499,7.22,0.28,0.483,0.499,3.712
9,EM3,552,184,5.369,5.535,0.235,0.419,0.347,3.109


In [4]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8_2/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8_2/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,3257,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,3123,95.195,95.886,1.344,2.401,0.251,0.725
2,Njets <= 4,9455,3013,91.957,92.508,1.31,2.338,0.206,0.6
3,"min Delta Phi(jets, ETmiss) cut",9104,2884,88.543,88.548,1.274,2.264,0.002,0.005
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,1916,64.598,58.827,1.017,1.694,2.921,8.934
5,ETmiss>200 GeV,5317,1608,51.712,49.371,0.874,1.505,1.346,4.527
6,EM0,1346,339,13.091,10.408,0.379,0.594,3.806,20.491
7,EM1,1045,332,10.163,10.193,0.33,0.587,0.045,0.296
8,EM2,771,254,7.499,7.799,0.28,0.508,0.517,4.001
9,EM3,552,160,5.369,4.912,0.235,0.398,0.988,8.496


In [1]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,3305,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,3139,95.195,94.977,1.344,2.367,0.08,0.229
2,Njets <= 4,9455,3033,91.957,91.77,1.31,2.308,0.07,0.203
3,"min Delta Phi(jets, ETmiss) cut",9104,2900,88.543,87.746,1.274,2.233,0.31,0.9
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,1973,64.598,59.697,1.017,1.698,2.476,7.587
5,ETmiss>200 GeV,5317,1682,51.712,50.893,0.874,1.524,0.466,1.584
6,EM0,1346,381,13.091,11.528,0.379,0.624,2.141,11.939
7,EM1,1045,333,10.163,10.076,0.33,0.579,0.132,0.863
8,EM2,771,282,7.499,8.533,0.28,0.529,1.727,13.789
9,EM3,552,186,5.369,5.628,0.235,0.424,0.535,4.829


In [2]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,1077,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,998,95.195,92.665,1.344,4.071,0.59,2.658
2,Njets <= 4,9455,931,91.957,86.444,1.31,3.868,1.35,5.995
3,"min Delta Phi(jets, ETmiss) cut",9104,867,88.543,80.501,1.274,3.673,2.068,9.082
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,566,64.598,52.553,1.017,2.728,4.137,18.646
5,ETmiss>200 GeV,5317,516,51.712,47.911,0.874,2.565,1.403,7.35
6,EM0,1346,57,13.091,5.292,0.379,0.719,9.589,59.571
7,EM1,1045,95,10.163,8.821,0.33,0.944,1.342,13.21
8,EM2,771,80,7.499,7.428,0.28,0.861,0.078,0.94
9,EM3,552,71,5.369,6.592,0.235,0.808,1.455,22.795


p p > xd xd~ / h w+ w- z a
p p > xd xd~ j / h w+ w- z a  
scale = 3


In [6]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,2122,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,2034,95.195,95.853,1.344,2.974,0.201,0.691
2,Njets <= 4,9455,1973,91.957,92.978,1.31,2.908,0.32,1.111
3,"min Delta Phi(jets, ETmiss) cut",9104,1866,88.543,87.936,1.274,2.791,0.198,0.686
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,1273,64.598,59.991,1.017,2.127,1.955,7.133
5,ETmiss>200 GeV,5317,1078,51.712,50.801,0.874,1.9,0.435,1.761
6,EM0,1346,241,13.091,11.357,0.379,0.772,2.015,13.243
7,EM1,1045,206,10.163,9.708,0.33,0.708,0.583,4.482
8,EM2,771,182,7.499,8.577,0.28,0.662,1.499,14.38
9,EM3,552,123,5.369,5.796,0.235,0.538,0.729,7.969


In [8]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,2122,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,2034,95.195,95.853,1.344,2.974,0.201,0.691
2,Njets <= 4,9455,1973,91.957,92.978,1.31,2.908,0.32,1.111
3,"min Delta Phi(jets, ETmiss) cut",9104,1866,88.543,87.936,1.274,2.791,0.198,0.686
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,1273,64.598,59.991,1.017,2.127,1.955,7.133
5,ETmiss>200 GeV,5317,1078,51.712,50.801,0.874,1.9,0.435,1.761
6,EM0,1346,241,13.091,11.357,0.379,0.772,2.015,13.243
7,EM1,1045,206,10.163,9.708,0.33,0.708,0.583,4.482
8,EM2,771,182,7.499,8.577,0.28,0.662,1.499,14.38
9,EM3,552,123,5.369,5.796,0.235,0.538,0.729,7.969


In [None]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8_2/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



In [10]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,2152,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,2049,95.195,95.214,1.344,2.939,0.006,0.019
2,Njets <= 4,9455,1996,91.957,92.751,1.31,2.882,0.251,0.864
3,"min Delta Phi(jets, ETmiss) cut",9104,1900,88.543,88.29,1.274,2.779,0.083,0.286
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,1265,64.598,58.783,1.017,2.083,2.509,9.003
5,ETmiss>200 GeV,5317,1067,51.712,49.582,0.874,1.856,1.038,4.119
6,EM0,1346,244,13.091,11.338,0.379,0.766,2.05,13.388
7,EM1,1045,205,10.163,9.526,0.33,0.696,0.827,6.271
8,EM2,771,173,7.499,8.039,0.28,0.635,0.779,7.208
9,EM3,552,105,5.369,4.879,0.235,0.488,0.904,9.116
