In [2]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_01_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_02_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_03_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_04_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
   '/home/yoxara/Output_MC/Eff_MA5_LO_Spin0/_run_01_my0_350_mxd_1/Output/SAF/pseudos_run_01_my0_350_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV',
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV',
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV',
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV',
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [199254, 187094, 176978, 168962, 90366, 60133,
                    25162, 15549, 8648, 4717, 4034, 1303, 444, 156, 67, 28, 12, 7, 6]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 and ma5_values[i] > 0 else None
        for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100 if ma5_values[i - 1] > 0 else None
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else None
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else None
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i] is not None and eff_ma5_delta_per_cut[i] is not None else None
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if atlas_values[i] > 0 else None
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if ma5_values[i] > 0 and total_ma5 > 0 else None
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i] is not None and eff_ma5_cumulative_errors[i] is not None else None
        for i in range(len(eff_atlas_cumulative))
    ]

    # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i])) * 100 if eff_atlas_cumulative[i] > 0 and eff_ma5_cumulative[i] is not None else None
        for i in range(len(eff_atlas_cumulative))
    ]

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3).replace({None: 'N/A'})

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)


DataFrame for base path: /home/yoxara/Output_MC/Eff_MA5_LO_Spin0/_run_01_my0_350_mxd_1/Output/SAF/pseudos_run_01_my0_350_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),199254,140088,100.0,100.0,0.317,0.378,0.0,0.0
1,Lepton veto,187094,136453,93.897,97.405,0.302,0.37,7.336,3.736
2,Njets <= 4,176978,131155,88.82,93.623,0.29,0.36,10.393,5.408
3,"min Delta Phi(jets, ETmiss) cut",168962,122672,84.797,87.568,0.28,0.342,6.26,3.267
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,90366,68671,45.352,49.02,0.182,0.228,12.563,8.087
5,ETmiss>200 GeV,60133,47175,30.179,33.675,0.14,0.179,15.354,11.585
6,EM0,25162,18928,12.628,13.512,0.084,0.105,6.569,6.996
7,EM1,15549,12554,7.804,8.962,0.065,0.083,10.945,14.838
8,EM2,8648,7228,4.34,5.16,0.048,0.062,10.452,18.88
9,EM3,4717,3871,2.367,2.763,0.035,0.045,6.952,16.725


In [None]:
generate p p > xd xd~ j/  h w+ w- z a [noborn=QCD]
SCALE = 0
PTJ 100


In [1]:
import pandas as pd
import os
import glob
import numpy as np

# Define a list of base paths to the EM files
base_paths = [
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_01_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_02_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_03_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
  #  '/home/yoxara/MonoXSMS/Output_MC/_run_04_mMed_350_mDM_1/Output/SAF/*/atlas_exot_2018_06/Cutflows/',
   '/home/yoxara/workstation/Output_MC/Eff_MA5_LO_Spin0/_run_01_my0_350_mxd_1/Output/SAF/pseudos_run_01_my0_350_mxd_1/atlas_exot_2018_06/Cutflows/'
]

# List of EM file names
em_file_names = [f'EM{i}.saf' for i in range(13)]

# Define the desired cuts
desired_cuts = [
    'MET > 150 GeV', 'Lepton Veto', 'Njets <= 4', 'dphijmet > 0.4(0.6)',
    'Leading jet pT > 150 GeV', 'MET > 200 GeV', '200 < MET < 250 GeV',
    '250 < MET < 300 GeV', '300 < MET < 350 GeV', '350 < MET < 400 GeV',
    '400 < MET < 500 GeV', '500 < MET < 600 GeV', '600 < MET < 700 GeV',
    '700 < MET < 800 GeV', '800 < MET < 900 GeV', '900 < MET < 1000 GeV',
    '1000 < MET < 1100 GeV', '1100 < MET < 1200 GeV', 'MET < 1200 GeV'
]

# Function to extract relevant cuts and their nentries from a SAF file
def extract_all_cuts_nentries(em_file_path):
    with open(em_file_path, 'r') as file:
        lines = file.readlines()
    nentries = {}
    for i, line in enumerate(lines):
        if line.startswith('<Counter>'):
            cut_name = lines[i + 1].strip().split('"')[1]
            if 'nentries' in lines[i + 2]:
                nentries_value = int(lines[i + 2].split()[0])
                nentries[cut_name] = nentries_value
    return nentries

# Iterate over each base path and create a DataFrame for each
for base_path in base_paths:
    # Collecting nentries from all EM files in the current base path
    em_files_paths = []
    for file_name in em_file_names:
        matched_files = glob.glob(os.path.join(base_path, file_name))
        em_files_paths.extend(matched_files)

    # Notify if no files were found for the current base path
    if not em_files_paths:
        print(f"No input files found for base path: {base_path}")
        continue

    nentries_all_final = {em_file: extract_all_cuts_nentries(em_file) for em_file in em_files_paths}

    # Create the nentries array with relevant cuts
    nentries_array_final_complete = [0] * len(desired_cuts)
    for cuts in nentries_all_final.values():
        for i, cut in enumerate(desired_cuts):
            if cut in cuts:
                nentries_array_final_complete[i] = cuts[cut]

    # Official ATLAS values
    atlas_values = [199254, 187094, 176978, 168962, 90366, 60133,
                    25162, 15549, 8648, 4717, 4034, 1303, 444, 156, 67, 28, 12, 7, 6]

    ma5_values = nentries_array_final_complete

    # Total values for ATLAS and MA5
    total_atlas, total_ma5 = atlas_values[0], ma5_values[0] or 1  # Avoid division by zero

    # Calculate cumulative efficiencies for ATLAS
    eff_atlas_cumulative = [
        (atlas_values[i] / total_atlas) * 100 for i in range(len(atlas_values))
    ]

    # Calculate cumulative efficiencies for MA5
    eff_ma5_cumulative = [
        (ma5_values[i] / total_ma5) * 100 if total_ma5 > 0 and ma5_values[i] > 0 else None
        for i in range(len(ma5_values))
    ]

    # Calculate efficiencies per cut for ATLAS
    eff_atlas = [
        (atlas_values[i] / atlas_values[i - 1]) * 100
        for i in range(1, len(atlas_values))
    ]
    eff_atlas.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate efficiencies per cut for MA5
    eff_ma5_per_cut = [
        (ma5_values[i] / ma5_values[i - 1]) * 100 if ma5_values[i - 1] > 0 else None
        for i in range(1, len(ma5_values))
    ]
    eff_ma5_per_cut.insert(0, 100)  # Insert 100% for the first cut (initial state)

    # Calculate statistical errors (delta) for MA5 efficiencies per cut
    eff_ma5_delta_per_cut = [
        (eff_ma5_per_cut[i] * np.sqrt(1 / ma5_values[i] + 1 / ma5_values[i - 1]))
        if i > 0 and ma5_values[i] > 0 and ma5_values[i - 1] > 0 else None
        for i in range(len(ma5_values))
    ]

    # Calculate statistical errors for ATLAS
    eff_atlas_errors = [
        (eff_atlas[i] * np.sqrt(1 / atlas_values[i] + 1 / atlas_values[i - 1]))
        if i > 0 and atlas_values[i] > 0 and atlas_values[i - 1] > 0 else None
        for i in range(len(atlas_values))
    ]

    # Calculate sigma based on the difference of efficiencies per cut
    sigma_values = [
        abs(eff_atlas[i] - eff_ma5_per_cut[i]) / np.sqrt(eff_atlas_errors[i]**2 + eff_ma5_delta_per_cut[i]**2)
        if eff_atlas_errors[i] is not None and eff_ma5_delta_per_cut[i] is not None else None
        for i in range(len(eff_atlas))
    ]

    # Calculate cumulative statistical errors for ATLAS and MA5
    eff_atlas_cumulative_errors = [
        (eff_atlas_cumulative[i] * np.sqrt(1 / atlas_values[i] + 1 / total_atlas))
        if atlas_values[i] > 0 else None
        for i in range(len(atlas_values))
    ]
    eff_ma5_cumulative_errors = [
        (eff_ma5_cumulative[i] * np.sqrt(1 / ma5_values[i] + 1 / total_ma5))
        if ma5_values[i] > 0 and total_ma5 > 0 else None
        for i in range(len(ma5_values))
    ]

    # Calculate sigma based on the cumulative efficiencies
    sigma_cumulative_values = [
        abs(eff_atlas_cumulative[i] - eff_ma5_cumulative[i]) / np.sqrt(eff_atlas_cumulative_errors[i]**2 + eff_ma5_cumulative_errors[i]**2)
        if eff_atlas_cumulative_errors[i] is not None and eff_ma5_cumulative_errors[i] is not None else None
        for i in range(len(eff_atlas_cumulative))
    ]

    # Calculate relative differences
    relative_differences = [
        abs(1 - (eff_ma5_cumulative[i] / eff_atlas_cumulative[i])) * 100 if eff_atlas_cumulative[i] > 0 and eff_ma5_cumulative[i] is not None else None
        for i in range(len(eff_atlas_cumulative))
    ]

    # Create DataFrame with the efficiencies, sigma, and relative differences
    DF = {
        "Cuts": [
            r"Total events (ETmiss>150 GeV)", "Lepton veto", r"Njets <= 4",
            r"min Delta Phi(jets, ETmiss) cut", r"Lead.Jet pT>150GeV & Jet |eta|<2.4",
            r"ETmiss>200 GeV", "EM0", "EM1", "EM2", "EM3", "EM4", "EM5", "EM6",
            "EM7", "EM8", "EM9", "EM10", "EM11", "EM12"
        ],
        "ATLAS Events": atlas_values,
        "MA5 Events": ma5_values,
        "ATLAS Cumulative Efficiencies [%]": eff_atlas_cumulative,
        "MA5 Cumulative Efficiencies [%]": eff_ma5_cumulative,
        "ATLAS Cumulative Errors [%]": eff_atlas_cumulative_errors,
        "MA5 Cumulative Errors [%]": eff_ma5_cumulative_errors,
        "Cumulative Sigma": sigma_cumulative_values,
        "Relative Differences": relative_differences
    }

    df = pd.DataFrame(DF).round(3).replace({None: 'N/A'})

    # Display the DataFrame
    print(f"DataFrame for base path: {base_path}")
    display(df)


DataFrame for base path: /home/yoxara/workstation/Output_MC/Eff_MA5_LO_Spin0/_run_01_my0_350_mxd_1/Output/SAF/pseudos_run_01_my0_350_mxd_1/atlas_exot_2018_06/Cutflows/


Unnamed: 0,Cuts,ATLAS Events,MA5 Events,ATLAS Cumulative Efficiencies [%],MA5 Cumulative Efficiencies [%],ATLAS Cumulative Errors [%],MA5 Cumulative Errors [%],Cumulative Sigma,Relative Differences
0,Total events (ETmiss>150 GeV),199254,145137,100.0,100.0,0.317,0.371,0.0,0.0
1,Lepton veto,187094,140533,93.897,96.828,0.302,0.362,6.21,3.121
2,Njets <= 4,176978,131999,88.82,90.948,0.29,0.346,4.713,2.395
3,"min Delta Phi(jets, ETmiss) cut",168962,123619,84.797,85.174,0.28,0.33,0.87,0.444
4,Lead.Jet pT>150GeV & Jet |eta|<2.4,90366,74735,45.352,51.493,0.182,0.232,20.839,13.54
5,ETmiss>200 GeV,60133,53878,30.179,37.122,0.14,0.187,29.662,23.006
6,EM0,25162,19416,12.628,13.378,0.084,0.102,5.652,5.936
7,EM1,15549,13768,7.804,9.486,0.065,0.085,15.774,21.562
8,EM2,8648,8547,4.34,5.889,0.048,0.066,19.108,35.683
9,EM3,4717,5213,2.367,3.592,0.035,0.051,19.916,51.723
