In [None]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/workstation/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,6912,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,6599,95.195,95.472,1.344,1.643,0.13,0.29
2,Njets <= 4,9455,6241,91.957,90.292,1.31,1.577,0.812,1.81
3,"min Delta Phi(jets, ETmiss) cut",9104,5783,88.543,83.666,1.274,1.491,2.487,5.508
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,3822,64.598,55.295,1.017,1.115,6.166,14.402
5,ETmiss>200 GeV,5317,3224,51.712,46.644,0.874,0.995,3.828,9.801
6,EM0,1346,698,13.091,10.098,0.379,0.401,5.42,22.859
7,EM1,1045,653,10.163,9.447,0.33,0.387,1.408,7.045
8,EM2,771,490,7.499,7.089,0.28,0.331,0.944,5.46
9,EM3,552,336,5.369,4.861,0.235,0.272,1.414,9.453


scale = 3

In [None]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,6601,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,6275,95.195,95.061,1.344,1.676,0.062,0.141
2,Njets <= 4,9455,5967,91.957,90.395,1.31,1.615,0.751,1.698
3,"min Delta Phi(jets, ETmiss) cut",9104,5608,88.543,84.957,1.274,1.543,1.792,4.05
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,3678,64.598,55.719,1.017,1.146,5.794,13.746
5,ETmiss>200 GeV,5317,3039,51.712,46.038,0.874,1.009,4.25,10.971
6,EM0,1346,691,13.091,10.468,0.379,0.419,4.642,20.035
7,EM1,1045,579,10.163,8.771,0.33,0.38,2.765,13.696
8,EM2,771,476,7.499,7.211,0.28,0.342,0.65,3.834
9,EM3,552,366,5.369,5.545,0.235,0.298,0.464,3.278


    f.write("generate p p > xd xd~   / h w+ w- z a\n")  +"add process  p p > xd xd~ j / h w+ w- z a + scale 3


In [2]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)



DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,1551,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,1500,95.195,96.712,1.344,3.502,0.404,1.593
2,Njets <= 4,9455,1446,91.957,93.23,1.31,3.408,0.349,1.385
3,"min Delta Phi(jets, ETmiss) cut",9104,1398,88.543,90.135,1.274,3.324,0.447,1.798
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,934,64.598,60.219,1.017,2.494,1.626,6.779
5,ETmiss>200 GeV,5317,778,51.712,50.161,0.874,2.204,0.654,2.998
6,EM0,1346,188,13.091,12.121,0.379,0.936,0.96,7.407
7,EM1,1045,163,10.163,10.509,0.33,0.865,0.374,3.404
8,EM2,771,107,7.499,6.899,0.28,0.69,0.806,7.998
9,EM3,552,94,5.369,6.061,0.235,0.644,1.01,12.89


    f.write("generate p p > xd xd~ j / h w+ w- z a\n")
 scale = 3

In [3]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)


DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,4984,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,4744,95.195,95.185,1.344,1.931,0.005,0.011
2,Njets <= 4,9455,4522,91.957,90.73,1.31,1.863,0.538,1.334
3,"min Delta Phi(jets, ETmiss) cut",9104,4232,88.543,84.912,1.274,1.775,1.662,4.101
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,2781,64.598,55.799,1.017,1.321,5.279,13.622
5,ETmiss>200 GeV,5317,2304,51.712,46.228,0.874,1.165,3.767,10.605
6,EM0,1346,565,13.091,11.336,0.379,0.503,2.784,13.403
7,EM1,1045,464,10.163,9.31,0.33,0.452,1.526,8.399
8,EM2,771,359,7.499,7.203,0.28,0.394,0.612,3.941
9,EM3,552,265,5.369,5.317,0.235,0.335,0.126,0.961


    f.write("generate p p > xd xd~ / h w+ w- z a\n")
    f.write("add process  p p > xd xd~ j / h w+ w- z a\n")
    f.write("set run_card dynamical_scale_choice 0\n")
    f.write("set run_card scalefact 1.0\n")
    f.write("set run_card ptj 150\n")

In [4]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)


DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,1556,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,1486,95.195,95.501,1.344,3.464,0.082,0.321
2,Njets <= 4,9455,1434,91.957,92.159,1.31,3.374,0.056,0.22
3,"min Delta Phi(jets, ETmiss) cut",9104,1377,88.543,88.496,1.274,3.274,0.013,0.053
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,931,64.598,59.833,1.017,2.479,1.778,7.377
5,ETmiss>200 GeV,5317,796,51.712,51.157,0.874,2.229,0.232,1.073
6,EM0,1346,176,13.091,11.311,0.379,0.9,1.823,13.596
7,EM1,1045,150,10.163,9.64,0.33,0.824,0.589,5.149
8,EM2,771,136,7.499,8.74,0.28,0.782,1.496,16.561
9,EM3,552,79,5.369,5.077,0.235,0.586,0.462,5.429


    f.write("generate p p > xd xd~  / h w+ w- z a\n")
    #f.write("add process  p p > xd xd~ j / h w+ w- z a\n")
    scale = 0, 
    pt > 150

In [5]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    '/home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)


DataFrame for base path: /home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,1551,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,1500,95.195,96.712,1.344,3.502,0.404,1.593
2,Njets <= 4,9455,1446,91.957,93.23,1.31,3.408,0.349,1.385
3,"min Delta Phi(jets, ETmiss) cut",9104,1398,88.543,90.135,1.274,3.324,0.447,1.798
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,934,64.598,60.219,1.017,2.494,1.626,6.779
5,ETmiss>200 GeV,5317,778,51.712,50.161,0.874,2.204,0.654,2.998
6,EM0,1346,188,13.091,12.121,0.379,0.936,0.96,7.407
7,EM1,1045,163,10.163,10.509,0.33,0.865,0.374,3.404
8,EM2,771,107,7.499,6.899,0.28,0.69,0.806,7.998
9,EM3,552,94,5.369,6.061,0.235,0.644,1.01,12.89


    f.write("generate p p > xd xd~ j / h w+ w- z a\n")
    f.write("set run_card dynamical_scale_choice 0\n")
    f.write("set run_card scalefact 1.0\n")
    f.write("set run_card ptj 150\n")

In [1]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
    #'',
    #'/home/yoxara/MonoXSMS/Output_MC/Eff_MA5_LO_Tab8/_run_01_my1_2000_mxd_1/Output/SAF/spin1_run_01_my1_2000_mxd_1/atlas_exot_2018_06/Cutflows/'
     '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/*/Output/SAF/*/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV', 
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV', 
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV', 
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV', 
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [10282, 9788, 9455, 9104, 6642, 5317, 
                1346, 1045, 771, 552, 684, 371, 212, 126, 79, 48, 29, 19, 35]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 else 0 for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else 0
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2 > 0 else 0
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if i > 0 and atlas_values[i] > 0 else 0
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if i > 0 and ma5_values[i] > 0 else 0
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2 > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

        # Calculate relative differences
    #relative_differences = [
    #    abs(1 - (eff_ma5[i] / eff_atlas[i]))*100 if eff_atlas[i] > 0 else 0
    #    for i in range(len(eff_atlas))
    #]

        # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i]))*100 if eff_atlas_cumulative[i] > 0 else 0
        for i in range(len(eff_atlas_cumulative))
    ]

# List of EM file names

    # Calculate efficiencies per cut for ATLAS
    #eff_atlas = [
    #    (atlas_values[i] / atlas_values[i - 1]) * 100 if i > 0 and atlas_values[i - 1] > 0 else 100
    #    for i in range(len(atlas_values))
    #]

    # Calculate efficiencies per cut for MA5
   # eff_ma5_per_cut = [
   #    (ma5_values[i] / ma5_values[i - 1]) * 100 if i > 0 and ma5_values[i - 1] > 0 else 100
   #    for i in range(len(ma5_values))
   # ]
   

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        #"ATLAS Efficiencies per Cut [%]": eff_atlas,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        #"ATLAS Efficiency Errors [%]": eff_atlas_errors,
        #"MA5 Efficiencies per Cut [%]": eff_ma5_per_cut,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        #"MA5 Delta per Cut [%]": eff_ma5_delta_per_cut,
        #"Sigma (Difference of Efficiencies)": sigma_values,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3)

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)


DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Tab8/*/Output/SAF/*/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),10282,3305,100.0,100.0,0.0,0.0,0.0,0.0
1,Lepton veto,9788,3139,95.195,94.977,1.344,2.367,0.08,0.229
2,Njets <= 4,9455,3033,91.957,91.77,1.31,2.308,0.07,0.203
3,"min Delta Phi(jets, ETmiss) cut",9104,2900,88.543,87.746,1.274,2.233,0.31,0.9
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,6642,1973,64.598,59.697,1.017,1.698,2.476,7.587
5,ETmiss>200 GeV,5317,1682,51.712,50.893,0.874,1.524,0.466,1.584
6,EM0,1346,381,13.091,11.528,0.379,0.624,2.141,11.939
7,EM1,1045,333,10.163,10.076,0.33,0.579,0.132,0.863
8,EM2,771,282,7.499,8.533,0.28,0.529,1.727,13.789
9,EM3,552,186,5.369,5.628,0.235,0.424,0.535,4.829
