In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

In [3]:
HOME_DIR = Path(os.environ.get("HOME", "/home/cms-jovyan"))
PROJECT_NAME = "H-to-WW-NanoAOD-analysis"

PROJECT_DIR = HOME_DIR / PROJECT_NAME
AUX_DIR = PROJECT_DIR / "Auxillary_files"
output_dir = AUX_DIR / "Efficiencies"
# output_dir = Working_dir / "outputs"

print(f"HOME_DIR:         {HOME_DIR}")
print(f"PROJECT_DIR:     {PROJECT_DIR}")
print(f"AUX_DIR:         {AUX_DIR}")
print(f"output:     {output_dir}")

HOME_DIR:         /home/cms-jovyan
PROJECT_DIR:     /home/cms-jovyan/H-to-WW-NanoAOD-analysis
AUX_DIR:         /home/cms-jovyan/H-to-WW-NanoAOD-analysis/Auxillary_files
output:     /home/cms-jovyan/H-to-WW-NanoAOD-analysis/Auxillary_files/Efficiencies


## ELECTRON EFFICIENCY

In [4]:
input_path = AUX_DIR / "egammaEffi_TightHWW_2016.txt"  
output_path = output_dir / "egammaEffi_TightHWW_2016.txt"


cols = [
    "eta_low", "eta_high", "pt_low", "pt_high",
    "effData", "statErrData", "systErrData",
    "effMC", "statErrMC", "systErrMC",
    "effDataAltBkg", "effDataAltSig", "effMCAltMC", "effMCTagSel"
]

if not os.path.exists(input_path):
    print(f"Error: File {input_path} not found.")
else:
    df = pd.read_csv(input_path, delim_whitespace=True, comment='#', names=cols)

    df['abs_eta_low'] = df.apply(lambda r: min(abs(r['eta_low']), abs(r['eta_high'])), axis=1)
    df['abs_eta_high'] = df.apply(lambda r: max(abs(r['eta_low']), abs(r['eta_high'])), axis=1)

    def average_val(x):
        return np.mean(x)

    # Statistical Error: Quadrature / N (Standard Error on Mean)
    def combine_stat_error(x):
        return np.sqrt(np.sum(np.square(x))) / len(x)

    # Systematic Error: Simple Average (Conservative)
    def combine_syst_error(x):
        return np.mean(x)

    grouped = df.groupby(['abs_eta_low', 'abs_eta_high', 'pt_low', 'pt_high'])

    result = grouped.agg({
        'effData': average_val,
        'statErrData': combine_stat_error,
        'systErrData': combine_syst_error,
        'effMC': average_val,
        'statErrMC': combine_stat_error,
        'systErrMC': combine_syst_error
    }).reset_index()

    
    #  Scale Factor (Data / MC)
    result['ScaleFactor'] = result['effData'] / result['effMC']

    result['SF_StatErr'] = result['ScaleFactor'] * np.sqrt(
        (result['statErrData'] / result['effData'])**2 + 
        (result['statErrMC'] / result['effMC'])**2
    )

    result['SF_SystErr'] = result['ScaleFactor'] * np.sqrt(
        (result['systErrData'] / result['effData'])**2 + 
        (result['systErrMC'] / result['effMC'])**2
    )

    result['SF_TotalErr'] = np.sqrt(result['SF_StatErr']**2 + result['SF_SystErr']**2)

    result.sort_values(by=['abs_eta_low', 'pt_low'], inplace=True)
    result.rename(columns={'abs_eta_low': 'eta_low', 'abs_eta_high': 'eta_high'}, inplace=True)

    with open(output_path, "w") as f:
        header = (f"{'eta_low':>10} {'eta_high':>10} {'pt_low':>10} {'pt_high':>10} "
                  f"{'effData':>10} {'effMC':>10} "
                  f"{'ScaleFactor':>12} {'SF_StatErr':>12} {'SF_SystErr':>12} {'SF_TotalErr':>12}\n")
        f.write(header)

        for _, row in result.iterrows():
            line = (
                f"{row['eta_low']:>10.3f} {row['eta_high']:>10.3f} "
                f"{row['pt_low']:>10.3f} {row['pt_high']:>10.3f} "
                f"{row['effData']:>10.4f} {row['effMC']:>10.4f} "
                f"{row['ScaleFactor']:>12.4f} {row['SF_StatErr']:>12.4f} "
                f"{row['SF_SystErr']:>12.4f} {row['SF_TotalErr']:>12.4f}\n"
            )
            f.write(line)

    print(f"Processed file saved to: {output_path}")

Processed file saved to: /home/cms-jovyan/H-to-WW-NanoAOD-analysis/Auxillary_files/Efficiencies/egammaEffi_TightHWW_2016.txt


  df = pd.read_csv(input_path, delim_whitespace=True, comment='#', names=cols)


## Muon Efficiency

In [6]:

input_path = AUX_DIR/ "Muon_ISO_Eff.txt"  
output_path = output_dir/ "Muon_ISO_Eff.txt"

if not os.path.exists(input_path):
    print(f"Error: File {input_path} not found.")
    exit()

df = pd.read_csv(input_path, delim_whitespace=True)

df['abs_eta_low'] = df.apply(lambda r: min(abs(r['eta_low']), abs(r['eta_high'])), axis=1)
df['abs_eta_high'] = df.apply(lambda r: max(abs(r['eta_low']), abs(r['eta_high'])), axis=1)

def calc_stat_error(errors):
    return 0.5 * np.sqrt(np.sum(np.square(errors)))

def calc_syst_error(errors):
    return np.mean(errors)

grouped = df.groupby(['abs_eta_low', 'abs_eta_high', 'pt_low', 'pt_high'])

result = grouped.agg({
    'effData': 'mean',              
    'statData': calc_stat_error,    
    'systData': calc_syst_error,    
    'effMC': 'mean',                
    'statMC': calc_stat_error,       
    'systMC': calc_syst_error       
}).reset_index()


result['SF'] = result['effData'] / result['effMC']

result['SF_Stat'] = result['SF'] * np.sqrt(
    (result['statData'] / result['effData'])**2 + 
    (result['statMC'] / result['effMC'])**2
)

result['SF_Syst'] = result['SF'] * np.sqrt(
    (result['systData'] / result['effData'])**2 + 
    (result['systMC'] / result['effMC'])**2
)

result['SF_Total'] = np.sqrt(result['SF_Stat']**2 + result['SF_Syst']**2)

result.sort_values(by=['abs_eta_low', 'pt_low'], inplace=True)
result.rename(columns={'abs_eta_low': 'eta_low', 'abs_eta_high': 'eta_high'}, inplace=True)

output_cols = [
    'eta_low', 'eta_high', 'pt_low', 'pt_high',
    'effData', 'statData', 'systData',
    'effMC', 'statMC', 'systMC',
    'SF', 'SF_Stat', 'SF_Syst', 'SF_Total'
]

with open(output_path, "w") as f:
    # Header
    header = (f"{'eta_low':>8} {'eta_high':>8} {'pt_low':>8} {'pt_high':>8} "
              f"{'effData':>10} {'statData':>10} {'systData':>10} "
              f"{'effMC':>10} {'statMC':>10} {'systMC':>10} "
              f"{'SF':>10} {'SF_Stat':>10} {'SF_Syst':>10} {'SF_Total':>10}\n")
    f.write(header)

    # Data Rows
    for _, row in result.iterrows():
        line = (f"{row['eta_low']:>8.3f} {row['eta_high']:>8.3f} "
                f"{row['pt_low']:>8.1f} {row['pt_high']:>8.1f} "
                f"{row['effData']:>10.4f} {row['statData']:>10.4f} {row['systData']:>10.4f} "
                f"{row['effMC']:>10.4f} {row['statMC']:>10.4f} {row['systMC']:>10.4f} "
                f"{row['SF']:>10.4f} {row['SF_Stat']:>10.4f} {row['SF_Syst']:>10.4f} {row['SF_Total']:>10.4f}\n")
        f.write(line)

print("-" * 50)
print(f"Success! Processed {len(df)} bins into {len(result)} symmetrized bins.")
print(f"Output saved to: {output_path}")
print("-" * 50)

--------------------------------------------------
Success! Processed 112 bins into 56 symmetrized bins.
Output saved to: /home/cms-jovyan/H-to-WW-NanoAOD-analysis/Auxillary_files/Efficiencies/Muon_ISO_Eff.txt
--------------------------------------------------


  df = pd.read_csv(input_path, delim_whitespace=True)
