###  Module & Utility Imports

In [1]:
import os
import re
import pandas as pd
import numpy as np

from Utilities.EvaluationMain import *
from Utilities.Utilities import ReadYaml, SerializeObjects, DeserializeObjects, LoadModelConfigs, LoadParams
from Models.Caller64 import *
from Utilities.Visualization import VisReconGivenZ_FCA, HeatMapFreqZ_FCA, VisReconGivenFC_ZA, VisReconExtractZ_FC

### Load Model Configurations and Evaluation Tables (Accuracy & MI)

In [2]:
def load_evaluation_tables(directory, acc_keyword, acc_pattern, mi_keyword, mi_pattern):
    """
    Load and combine evaluation tables from a specified directory based on filtering keywords.

    Parameters:
        directory (str): Path to the directory containing CSV table files.
        acc_keyword (str): Keyword to identify accuracy tables.
        acc_pattern (str): Additional substring that accuracy table filenames must contain.
        mi_keyword (str): Keyword to identify MI (Mutual Information) tables.
        mi_pattern (str): Additional substring that MI table filenames must contain.

    Returns:
        acc_df (DataFrame): A concatenated DataFrame of accuracy tables with an added 'RMSE' column.
        mi_df (DataFrame): A concatenated DataFrame of MI tables.
    """
    # List all files in the specified directory
    table_list = os.listdir(directory)
    
    # Load and combine accuracy tables
    acc_list = [tab for tab in table_list if acc_keyword in tab and acc_pattern in tab]
    acc_df = pd.DataFrame()
    for tab in acc_list:
        file_path = os.path.join(directory, tab)
        df = pd.read_csv(file_path)
        acc_df = pd.concat([acc_df, df], axis=0)
    # Compute RMSE if the 'MSEdenorm' column is available
    if 'MSEdenorm' in acc_df.columns:
        acc_df['RMSE'] = np.sqrt(acc_df['MSEdenorm'])
    
    # Load and combine MI tables
    mi_list = [tab for tab in table_list if mi_keyword in tab and mi_pattern in tab]
    mi_df = pd.DataFrame()
    for tab in mi_list:
        file_path = os.path.join(directory, tab)
        df = pd.read_csv(file_path)
        mi_df = pd.concat([mi_df, df], axis=0)
    
    return acc_df, mi_df


def load_config_models(config_directory, include_keyword='Config', exclude_keyword='Eval', key='Models'):
    """
    Load configuration files from the specified directory and extract model keys.

    Parameters:
        config_directory (str): Path to the directory containing YAML configuration files.
        include_keyword (str): Only consider files that include this keyword.
        exclude_keyword (str): Exclude files that contain this keyword.
        key (str): The key in the YAML file from which to extract model definitions.

    Returns:
        model_dict (dict): A dictionary mapping configuration file names (without extension)
                           to a list of model keys.
    """
    config_files = [f for f in os.listdir(config_directory)
                    if include_keyword in f and exclude_keyword not in f]
    model_dict = {}
    for config in config_files:
        full_path = os.path.join(config_directory, config)
        config_data = ReadYaml(full_path)
        model_dict[config.split('.')[0]] = list(config_data.get(key, {}).keys())
    return model_dict





# Main evaluation tables
eval_directory = './EvalResults/Tables/'
AcctableSet, MItableSet = load_evaluation_tables(
    eval_directory,
    acc_keyword='Acc',
    acc_pattern='Nj1_FC',
    mi_keyword='MI',
    mi_pattern='Nj1_FC')


# Benchmark evaluation tables
bench_directory = './Benchmarks/EvalResults/Tables/'
BenchAcctableSet, BenchMItableSet = load_evaluation_tables(
    bench_directory,
    acc_keyword='Acc',
    acc_pattern='NjAll',
    mi_keyword='MI',
    mi_pattern='NjAll')

# Define a mapping for metrics to be unified
metrics_map = {
    '(i) $I(V;\\acute{\\Theta} \\mid X)$': '(ii) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$',
    '(ii) $I(S;\\acute{\\Theta} \\mid X)$': '(iii) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$'}

# Create a new column ('UnifiedMetric') while preserving the original 'Metrics'
BenchMItableSet['Metrics'] = BenchMItableSet['Metrics'].replace(metrics_map)

# Extract unique metric types
MetricTypes = np.unique(MItableSet['MetricType']).tolist()

### Functions to Construct Analysis Table and Perform ISCORE-Based Parameter Selection for Main Models


In [3]:
def load_config_models(config_directory, include_keyword='Config', exclude_keyword='Eval', key='Models'):
    """
    Load configuration files from the specified directory and extract model keys.

    Parameters:
        config_directory (str): Path to the directory containing YAML configuration files.
        include_keyword (str): Only consider files that include this keyword.
        exclude_keyword (str): Exclude files that contain this keyword.
        key (str): The key in the YAML file from which to extract model definitions.

    Returns:
        dict: A dictionary mapping configuration file names (without extension)
              to a list of model keys.
    """
    config_files = [f for f in os.listdir(config_directory)
                    if include_keyword in f and exclude_keyword not in f]
    model_dict = {}
    for config in config_files:
        full_path = os.path.join(config_directory, config)
        config_data = ReadYaml(full_path)
        model_dict[config.split('.')[0]] = list(config_data.get(key, {}).keys())
    return model_dict



def prepare_analysis_table(mi_df, acc_df, target_models):
    """
    Prepare the analysis table by merging MI and accuracy data, filtering by target models and metrics,
    computing composite score metrics, and parsing model parameters.
    Parameters:
        mi_df (DataFrame): DataFrame containing MI evaluation results.
        acc_df (DataFrame): DataFrame containing accuracy evaluation results.
        target_models (list): List of model names to include in the analysis.
    Returns:
        DataFrame: The merged and processed analysis table containing performance metrics,
                   composite ISCORE, scaling factors, and parsed model parameters.
    """
    # Filter evaluation tables based on the target models
    mi_table = mi_df[mi_df['Model'].isin(target_models)].reset_index(drop=True)
    acc_table = acc_df[acc_df['Model'].isin(target_models)].reset_index(drop=True)
    
    # Normalize MAPE and select required columns for accuracy table
    if 'MAPEnorm' in acc_table.columns:
        acc_table['MAPEnorm'] = acc_table['MAPEnorm'] / 100
    acc_table = acc_table[['Model', 'MeanKldRes', 'RMSE', 'R2denorm']].copy()
    acc_table.columns = ['Model', 'FQI', 'RMSE', 'R2denorm']
    
    # Process MI table: group by Model and Metrics, average values, then filter and pivot the table
    mi_grouped = mi_table.groupby(['Model', 'Metrics']).mean(numeric_only=True).reset_index()
    mi_pivot = pd.pivot(mi_grouped, index='Model', columns='Metrics', values='Values').reset_index()
    
    # Merge MI and accuracy tables
    merged_table = pd.merge(mi_pivot, acc_table, on='Model', how='inner').sort_values('Model').reset_index(drop=True)
    
    # Split the 'Model' string into structural parameters
    split_cols = merged_table['Model'].str.split('_', expand=True)
    if split_cols.shape[1] == 6:
        split_cols.columns = ['Prefix', 'Type', 'Depth', 'LatDim', 'Comp', 'Source']
        merged_table = pd.concat([merged_table, split_cols], axis=1)
    elif split_cols.shape[1] == 4:
        mask = split_cols[3].isna() | (split_cols[3] == 'None')
        split_cols.loc[mask, 3] = split_cols.loc[mask, 2]
        split_cols.loc[mask, 2] = 0
        split_cols.columns = ['Prefix', 'Type', 'LatDim', 'Source']
        merged_table = pd.concat([merged_table, split_cols], axis=1)
    else:
        print("Warning: Unexpected model naming format. Check the 'Model' column.")
    
    # Compute composite integrated score (ISCORE)
    ms_metric_col = '(i) $I(V; \\acute{Z} \\mid Z)$'
    
    # Common metrics for all models
    merged_table['MP'] = 1-np.sqrt(1 - np.exp(-2 * merged_table['(ii) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$']))
    merged_table['AC'] = np.sqrt(1 - np.exp(-2 * merged_table['(iii) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$']))
    merged_table['SS'] = 1-np.sqrt(1 - np.exp(-2 * merged_table['FQI']))
    merged_table['RA'] = merged_table['R2denorm']
    merged_table['NMSE'] = 1 - merged_table['RA']

    
    # Handle MS metric if column exists
    if ms_metric_col in merged_table.columns:
        # Calculate MS only for rows with non-null values
        mask_has_ms = merged_table[ms_metric_col].notna()
        merged_table.loc[mask_has_ms, 'MS'] = np.sqrt(1 - np.exp(-2 * merged_table.loc[mask_has_ms, ms_metric_col]))
        
        # Calculate ISCORE with MS for models that have it (AM > GM > HM order)
        merged_table.loc[mask_has_ms, 'ISCOREam'] = mean(merged_table.loc[mask_has_ms, ['MS', 'MP', 'AC', 'SS', 'RA']], kind="am", axis=1)
        merged_table.loc[mask_has_ms, 'ISCOREgm'] = mean(merged_table.loc[mask_has_ms, ['MS', 'MP', 'AC', 'SS', 'RA']], kind="gm", axis=1)
        merged_table.loc[mask_has_ms, 'ISCOREhm'] = mean(merged_table.loc[mask_has_ms, ['MS', 'MP', 'AC', 'SS', 'RA']], kind="hm", axis=1)
        
        # Calculate ISCORE without MS for models that don't have it (AM > GM > HM order)
        mask_no_ms = merged_table[ms_metric_col].isna()
        merged_table.loc[mask_no_ms, 'ISCOREam'] = mean(merged_table.loc[mask_no_ms, ['MP', 'AC', 'SS', 'RA']], kind="am", axis=1)
        merged_table.loc[mask_no_ms, 'ISCOREgm'] = mean(merged_table.loc[mask_no_ms, ['MP', 'AC', 'SS', 'RA']], kind="gm", axis=1)
        merged_table.loc[mask_no_ms, 'ISCOREhm'] = mean(merged_table.loc[mask_no_ms, ['MP', 'AC', 'SS', 'RA']], kind="hm", axis=1)
    else:
        # Calculate ISCORE without MS for all models (AM > GM > HM order)
        merged_table['ISCOREam'] = mean(merged_table[['MP', 'AC', 'SS', 'RA']], kind="am", axis=1)
        merged_table['ISCOREgm'] = mean(merged_table[['MP', 'AC', 'SS', 'RA']], kind="gm", axis=1)
        merged_table['ISCOREhm'] = mean(merged_table[['MP', 'AC', 'SS', 'RA']], kind="hm", axis=1)
        
        
    return merged_table
    
def amean(x, weights=None, axis=None, eps=None):
    """
    Arithmetic mean (simple or weighted).
    If weights is None: AM = mean(x, axis).
    If weights is provided: AM = sum(w * x) / sum(w) along the given axis.
    Note: `eps` is unused here; included only for API symmetry with gmean/hmean.
    """
    if isinstance(x, (pd.DataFrame, pd.Series)):
        x = x.values

    x = np.asarray(x, dtype=float)

    if weights is None:
        return np.mean(x, axis=axis)

    w = np.asarray(weights, dtype=float)
    # np.average supports weights for both axis=None and axis=int
    return np.average(x, weights=w, axis=axis)
    
def gmean(x, weights=None, axis=None, eps=1e-12):
    """
    Geometric mean for non-negative values (typical use: scores in [0,1]).
    Stabilized with eps: GM = exp( average(log(x + eps)) ).
    If any entry is exactly 0, result trends toward 0 (controlled by eps).
    """
    if isinstance(x, (pd.DataFrame, pd.Series)):
        x = x.values
    
    x = np.asarray(x, dtype=float)
    logs = np.log(x + eps)
    if weights is None:
        return np.exp(np.mean(logs, axis=axis))
    return np.exp(np.average(logs, weights=np.asarray(weights, dtype=float), axis=axis))

def hmean(x, weights=None, axis=None, eps=1e-12):
    """
    Harmonic mean for strictly positive values.
    Stabilized with eps to avoid division by zero: H = sum(w) / sum(w / (x + eps))
    If any entry is near 0, HM drops strongly (designed behavior).
    """

    if isinstance(x, (pd.DataFrame, pd.Series)):
        x = x.values
    
    x = np.asarray(x, dtype=float)
    if weights is None:
        denom = np.sum(1.0 / (x + eps), axis=axis)
        count = x.size if axis is None else x.shape[axis]
        return count / denom
    w = np.asarray(weights, dtype=float)
    return np.sum(w, axis=axis) / np.sum(w / (x + eps), axis=axis)

def mean(x, kind="gm", weights=None, axis=None, eps=1e-12):
    """
    Unified interface:
      kind ∈ {"am","gm","hm"} for arithmetic / geometric / harmonic.
    Supports pandas DataFrame/Series input.
    """
    kind = kind.lower()
    if kind == "am":
        return amean(x, weights=weights, axis=axis)
    if kind == "gm":
        return gmean(x, weights=weights, axis=axis, eps=eps)
    if kind == "hm":
        return hmean(x, weights=weights, axis=axis, eps=eps)
    raise ValueError("kind must be one of {'am','gm','hm'}")

### Construct Analysis Table and Perform ISCORE-Based Parameter Selection for Main Models

In [4]:
# Load configuration models and combine all model keys across configuration files
config_directory = './Config/'
TabLists = load_config_models(config_directory)
AnalTabList = list(np.concatenate([tabs for key, tabs in TabLists.items()]))

legend_map = {
    'Depth': r'$\zeta$',
    'LatDim': r'$J$',
    'Comp': r'$C$'
}

# Define ablation models to exclude from final analysis (if needed later)
AblationList = [
    'FC_ART_1_30_800_Mimic', 'SKZ_ART_1_30_800_Mimic',
    'FC_ART_1_50_800_VitalDB', 'SKZ_ART_1_50_800_VitalDB',
    'FC_II_1_50_800_Mimic', 'SKZ_II_1_50_800_Mimic',
    'FC_II_1_50_800_VitalDB', 'SKZ_II_1_50_800_VitalDB'
]

MainList = [
    'SKZFC_ART_1_30_800_Mimic',
    'SKZFC_ART_1_30_800_VitalDB',
    'SKZFC_II_1_30_800_Mimic',
    'SKZFC_II_1_30_800_VitalDB'
]

ExclusionList = ['FC_ART_1_50_800_Mimic',  'SKZ_ART_1_50_800_Mimic']

# Define MI metrics to be used in the analysis
AnalMetricList = [
    '(i) $I(V; \\acute{Z} \\mid Z)$',
    '(ii) $I(V;\\acute{\\Theta} \\mid \\acute{Z})$',
    '(iii) $I(S;\\acute{\\Theta} \\mid \\acute{Z})$'
]


# Prepare the merged analysis table using the function
AnalAccMItableDic = {}
SubAcctableSet = AcctableSet.copy()
for mtype in MetricTypes:
    SubMItableSet = MItableSet[MItableSet['MetricType'] == mtype].reset_index(drop=True)
    KldCols = [col for col in AcctableSet.columns if 'MeanKld' in col]
    SelKldCols = [type for type in KldCols if mtype in type]
    SubAcctableSet['MeanKldRes'] = AcctableSet[SelKldCols]
    AnalAccMItableDic[mtype] = prepare_analysis_table(SubMItableSet, SubAcctableSet, AnalTabList)

AnalAccMItableMerged = pd.concat(AnalAccMItableDic, axis=0)
AnalAccMItableMerged = AnalAccMItableMerged.reset_index(level=0).rename(columns={'level_0': 'MetricType'})
SenseAccMItable = AnalAccMItableMerged[~AnalAccMItableMerged['Model'].isin(AblationList+ExclusionList+MainList)].copy()
SenseAccMItable = SenseAccMItable[['MetricType','Model', 'Source', 'Type', 'Depth', 'LatDim', 'Comp', 'MS', 'MP', 'AC', 'SS', 'RA', 'ISCOREam', 'ISCOREgm', 'ISCOREhm']]


### Construct Analysis Table (ISCORE-Based) for Benchmarks

In [5]:
# Load configuration models and combine all model keys across configuration files
Bench_config_directory = './Benchmarks/Config/'
BenchTabLists = load_config_models(Bench_config_directory)
BenchAnalTabList = list(np.concatenate([tabs for key, tabs in BenchTabLists.items()]))

# Prepare the merged analysis table using the function
BenchAnalAccMItableDic = {}
SubBenchAcctableSet = BenchAcctableSet.copy()
for mtype in MetricTypes:
    SubBenchMItableSet = BenchMItableSet[BenchMItableSet['MetricType'] == mtype].reset_index(drop=True)
    KldCols = [col for col in BenchAcctableSet.columns if 'MeanKld' in col]
    SelKldCols = [type for type in KldCols if mtype in type]
    SubBenchAcctableSet['MeanKldRes'] = BenchAcctableSet[SelKldCols]
    BenchAnalAccMItableDic[mtype] = prepare_analysis_table(SubBenchMItableSet, SubBenchAcctableSet,  BenchAnalTabList)

BenchAnalAccMItableMerged = pd.concat(BenchAnalAccMItableDic)
BenchAnalAccMItableMerged = BenchAnalAccMItableMerged.reset_index(level=0).rename(columns={'level_0': 'MetricType'})
RAReferences = BenchAnalAccMItableMerged[['Type','Source', 'RA']].groupby(['Type','Source']).min().reset_index()

### Select Per-Group Best Models and Compute Cross-Method Aggregated Score

In [6]:
'''
# Per-method best selection for each metric
BestSerchDic = {}

for key, AnalAccMItable in AnalAccMItableDic.items():
    SubSenseAccMItable = AnalAccMItable[~AnalAccMItable['Model'].isin(AblationList+ExclusionList)].copy()
    
    PerformanceTab = SubSenseAccMItable[['Source', 'Type', 'Model','ISCOREam', 'ISCOREgm', 'ISCOREhm']].copy()
    PerformanceTablong = pd.DataFrame()
    for iscore_col in ['ISCOREam', 'ISCOREgm', 'ISCOREhm']:
        # pick max row per (Type, Source)
        SelPerformanceTab = PerformanceTab.loc[PerformanceTab.groupby(["Type", 'Source'])[iscore_col].idxmax(), ["Type", 'Source', 'Model', iscore_col]]
        SelPerformanceTab = SelPerformanceTab.rename(columns={iscore_col:'Value'}) 
        SelPerformanceTab['MetricType'] = iscore_col[6:]  # 'am','gm','hm'
        PerformanceTablong = pd.concat([PerformanceTablong, SelPerformanceTab], axis=0)
    BestSerchDic[key] = PerformanceTablong.reset_index(drop=True)[['Source', 'Type', 'MetricType', 'Model','Value']]
    

# Aggregate scores across all methods
MetricType = ['ISCOREam', 'ISCOREgm', 'ISCOREhm']
AnalAccMItableBinding = pd.DataFrame()
SenseAccMItable = pd.DataFrame()
for key, AnalAccMItable in AnalAccMItableDic.items():
    SubSenseAccMItable = AnalAccMItable[~AnalAccMItable['Model'].isin(AblationList+ExclusionList+MainList)].copy()
    SubSenseAccMItable['MetricType'] = key
    AnalAccMItableBinding = pd.concat([AnalAccMItableBinding, SubSenseAccMItable])
    SenseAccMItable = pd.concat([SenseAccMItable, SubSenseAccMItable])
AnalAccMItableBinding = AnalAccMItableBinding[['Source', 'Type', 'Model'] + MetricType].reset_index(drop=True)
SenseAccMItable = SenseAccMItable[['MetricType','Model', 'Source', 'Type', 'Depth', 'LatDim', 'Comp', 'MS', 'MP', 'AC', 'SS', 'RA', 'ISCOREam', 'ISCOREgm', 'ISCOREhm']]

# sum per (Source, Type, Model)
AnalAccMItableRowSum = AnalAccMItableBinding.groupby(['Source','Type','Model']).sum(numeric_only=True)

# mean over all methods × metrics
AnalAccMItableAvg = pd.DataFrame(AnalAccMItableRowSum.sum(1) / (len(AnalAccMItableDic.keys()) * len(MetricType)), columns=['agg_metric']).reset_index()

# pick max agg_metric per (Type, Source)
BestModels = AnalAccMItableAvg.loc[AnalAccMItableAvg.groupby(["Type", 'Source'])['agg_metric'].idxmax(), ["Type", 'Source', 'Model','agg_metric']]
BestModels = pd.merge(BestModels[['Model', 'agg_metric']], AnalAccMItableMerged, on='Model').reset_index(drop=True)
'''

'\n# Per-method best selection for each metric\nBestSerchDic = {}\n\nfor key, AnalAccMItable in AnalAccMItableDic.items():\n    SubSenseAccMItable = AnalAccMItable[~AnalAccMItable[\'Model\'].isin(AblationList+ExclusionList)].copy()\n    \n    PerformanceTab = SubSenseAccMItable[[\'Source\', \'Type\', \'Model\',\'ISCOREam\', \'ISCOREgm\', \'ISCOREhm\']].copy()\n    PerformanceTablong = pd.DataFrame()\n    for iscore_col in [\'ISCOREam\', \'ISCOREgm\', \'ISCOREhm\']:\n        # pick max row per (Type, Source)\n        SelPerformanceTab = PerformanceTab.loc[PerformanceTab.groupby(["Type", \'Source\'])[iscore_col].idxmax(), ["Type", \'Source\', \'Model\', iscore_col]]\n        SelPerformanceTab = SelPerformanceTab.rename(columns={iscore_col:\'Value\'}) \n        SelPerformanceTab[\'MetricType\'] = iscore_col[6:]  # \'am\',\'gm\',\'hm\'\n        PerformanceTablong = pd.concat([PerformanceTablong, SelPerformanceTab], axis=0)\n    BestSerchDic[key] = PerformanceTablong.reset_index(drop=True)[

### Aggregated Model Performance & Hyperparameter Sensitivity Analysis

In [8]:
import pandas as pd
import numpy as np

# =============================================
# 1.Aggregate performance across all methods
# =============================================

# Combine all per-method DataFrames into one table
all_methods_df = pd.concat(AnalAccMItableDic.values(), ignore_index=True)

# Exclude ablation models and other models in ExclusionList
non_ablation_df = all_methods_df[~all_methods_df['Model'].isin(AblationList + ExclusionList)].copy()

# Collect models with RA below the reference threshold into RemoveList
RemoveList = []
for idx, row in non_ablation_df.iterrows():
    target_mask = (RAReferences['Type'] == row['Type']) & (RAReferences['Source'] == row['Source'])
    if row['RA'] < RAReferences[target_mask]['RA'].values[0]:
        RemoveList.append(row['Model'])
RemoveList = list(set(RemoveList))  # Deduplicate

# Compute mean ISCOREgm for each (Source, Type, Model)
group_cols = ['Source', 'Type', 'Model']
MetricType = ['ISCOREgm']
aggregated_scores = non_ablation_df.groupby(group_cols)[MetricType].mean().reset_index()

# Exclude models in RemoveList
aggregated_scores_best = aggregated_scores.copy()
aggregated_scores_best = aggregated_scores_best[~aggregated_scores_best['Model'].isin(RemoveList)]

# Number of methods and metrics (for reference)
n_methods = len(AnalAccMItableDic)
n_metrics = len(MetricType)

# 2. Select Best Model per (Type, Source)
# =========================
# Pick the row index with the maximum ISCOREgm for each (Type, Source)
best_model_indices = aggregated_scores_best.groupby(['Type', 'Source'])['ISCOREgm'].idxmax()
BestOverallModels = aggregated_scores_best.loc[best_model_indices, ['Type', 'Source', 'Model', 'ISCOREgm']].reset_index(drop=True)
BestModels = pd.merge(BestOverallModels[['Model']], AnalAccMItableMerged, on='Model').reset_index(drop=True)

# =========================
# 3. Unified Sensitivity Analysis Function
# =========================
def calculate_sensitivity(df, hp, score_col, group_by=('Type','Source'),
                          latex_labels=True, legend_map=None,
                          n_methods=None, n_metrics=None):
    """
    Compute sensitivity statistics for a given hyperparameter (hp) with LaTeX-ready labels.

    Args:
        df (pd.DataFrame): Table containing model scores and hyperparameters.
        hp (str): Hyperparameter name (e.g., 'Depth', 'LatDim', 'Comp').
        score_col (str): Score column to evaluate (e.g., 'ISCOREgm').
        group_by (tuple): Group keys excluding hp (default ('Type','Source')).
        latex_labels (bool): If True, render Hyperparameter label in LaTeX-friendly form.
        legend_map (dict): Optional mapping {hp_key: latex_label_string}; if present, use value directly.
        n_methods, n_metrics (int): Optional counts to annotate effective data points.

    Returns:
        summary_stats (pd.DataFrame), detail_stats (pd.DataFrame)
    """
    import pandas as pd

    # If hp column is missing, return (None, None)
    if hp not in df.columns:
        return None, None

    legend_map = legend_map or {}

    # (1) Detailed statistics per (group_by..., hp)
    group_cols_detail = list(group_by) + [hp]
    detail_stats = (
        df.groupby(group_cols_detail)[score_col]
          .agg(mean='mean', std='std', max='max', min='min', n='size')
          .reset_index()
          .rename(columns={hp: 'Setting'})
    )

    # (2) Build display label for the Hyperparameter (legend_map has highest priority)
    if latex_labels:
        if hp in legend_map and isinstance(legend_map[hp], str) and legend_map[hp].strip():
            hp_label = legend_map[hp]  # Use mapped value as-is (e.g., r'$\zeta$')
        else:
            # Fallback labels if no mapping provided
            fallback = {'Depth': r'$\zeta$', 'LatDim': r'$J$', 'Comp': r'$C$'}
            hp_label = fallback.get(hp, hp)
            # If fallback is not already in math mode, wrap with \mathrm{...}
            if isinstance(hp_label, str) and not (hp_label.startswith('$') and hp_label.endswith('$')):
                hp_label = rf'$\mathrm{{{hp_label}}}$'
    else:
        # When LaTeX labels are disabled, use raw mapping or hp name
        hp_label = legend_map.get(hp, hp)

    detail_stats['Hyperparameter'] = hp_label

    # (3) Optional: annotate effective data points based on methods × metrics aggregation
    if (n_methods is not None) and (n_metrics is not None):
        total_points = n_methods * n_metrics
        detail_stats['n_effective'] = detail_stats['n'] * total_points
        detail_stats['note'] = f'Each model aggregated from {n_methods}x{n_metrics}={total_points} data points'

    # (4) Summary statistics per (group_by..., Hyperparameter)
    group_cols_summary = list(group_by) + ['Hyperparameter']
    summary_stats = (
        detail_stats.groupby(group_cols_summary)
        .agg(
            SensRange=('mean', lambda x: x.max() - x.min()),
            SensStd=('mean', 'std'),
            N_Settings=('Setting', 'nunique'),
            Total_Models=('n', 'sum')
        )
        .reset_index()
        .fillna(0)
    )

    # If effective counts exist, aggregate them at the same granularity
    if 'n_effective' in detail_stats.columns:
        tmp = (detail_stats.groupby(group_cols_summary)['n_effective']
               .sum().reset_index(name='Total_Effective_DataPoints'))
        summary_stats = summary_stats.merge(tmp, on=group_cols_summary, how='left')

    # (5) Column ordering for clarity
    summary_cols = list(group_by) + ['Hyperparameter', 'SensStd', 'SensRange', 'N_Settings', 'Total_Models']
    if 'Total_Effective_DataPoints' in summary_stats.columns:
        summary_cols.append('Total_Effective_DataPoints')

    detail_cols = list(group_by) + ['Hyperparameter', 'Setting', 'mean', 'std', 'max', 'min', 'n']
    if 'n_effective' in detail_stats.columns:
        detail_cols += ['n_effective', 'note']

    return summary_stats[summary_cols], detail_stats[detail_cols]

# =========================
# 4. Aggregated Sensitivity Analysis
# =========================
ParamCols = ['Depth', 'LatDim', 'Comp']

# Use the first method's DataFrame as a template to recover hyperparameter values
template_df = next(iter(AnalAccMItableDic.values()))
merge_cols = ['Source', 'Type', 'Model'] + ParamCols
template_df = template_df[~template_df['Model'].isin(AblationList+ExclusionList)]
template_subset = template_df.drop_duplicates(subset=merge_cols)[merge_cols]

# Merge aggregated scores with hyperparameter columns
df_with_agg = pd.merge(aggregated_scores, template_subset, on=['Source', 'Type', 'Model'], how='left')

# Run sensitivity analysis for each hyperparameter on the aggregated metric
summary_list, detail_list = [], []
for hp in ParamCols:
    summary, detail = calculate_sensitivity(df_with_agg, hp, 'ISCOREgm', n_methods=n_methods, n_metrics=n_metrics)
    if summary is not None:
        summary_list.append(summary)
        detail_list.append(detail)

AggSensitivityDic = pd.concat(summary_list, ignore_index=True) if summary_list else pd.DataFrame()
AggSensitivityDetailDic = pd.concat(detail_list, ignore_index=True) if detail_list else pd.DataFrame()

# =========================
# 5. Individual Method Sensitivity Analysis (Optional)
# =========================
MetricTypesKey = ['fft', 'matching_pursuit', 'welch_evo']
IScoreKey = ['ISCOREam', 'ISCOREgm', 'ISCOREhm']

SensitivityDic, SensitivityDetailDic = {}, {}

for mtype in MetricTypesKey:
    # Exclude ablation models per method
    df = AnalAccMItableDic[mtype][~AnalAccMItableDic[mtype]['Model'].isin(AblationList+ExclusionList)]

    SensitivityDic[mtype] = {}
    SensitivityDetailDic[mtype] = {}

    for iscore_col in IScoreKey:
        summary_list, detail_list = [], []
        for hp in ParamCols:
            summary, detail = calculate_sensitivity(df, hp, iscore_col)
            if summary is not None:
                summary_list.append(summary)
                detail_list.append(detail)

        SensitivityDic[mtype][iscore_col] = pd.concat(summary_list, ignore_index=True) if summary_list else pd.DataFrame()
        SensitivityDetailDic[mtype][iscore_col] = pd.concat(detail_list, ignore_index=True) if detail_list else pd.DataFrame()


SensitivityDetailTabs = pd.DataFrame()

for idx, values in SensitivityDetailDic.items():
    SubTab = pd.DataFrame()
    for sub_idx, sub_values in values.items():
        sub_values['IscoreType'] = sub_idx
        SubTab = pd.concat([SubTab, sub_values])
    SubTab['MetricType'] = idx
    SensitivityDetailTabs = pd.concat([SensitivityDetailTabs, SubTab])
    
# =========================
# Output Structure Documentation
# =========================
# AggSensitivityDic:
#   Summary of hyperparameter sensitivity using the aggregated metric across all methods/scores.
#   - Total_Models: number of unique models accumulated across settings for the hyperparameter.
#   - Total_Effective_DataPoints: Total_Models × n_methods × n_metrics (reflects aggregation basis).
#   Provides a single consolidated view of sensitivity.

# AggSensitivityDetailDic:
#   Detailed per-setting statistics using the aggregated metric.
#   - n: number of unique models per (Type, Setting) for this specific hyperparameter.
#   - n_effective: n × n_methods × n_metrics (effective points behind ISCOREgm).
#   - note: human-readable explanation of aggregation basis.
#   Shows how each setting behaves for the overall aggregated score.

# SensitivityDic[mtype][iscore]:
#   Summary sensitivity per method (mtype) and individual score (iscore).

# SensitivityDetailDic[mtype][iscore]:
#   Per-setting detailed stats per method (mtype) and score (iscore).



### Gnerating Evaluation Summary Tables 

In [9]:
OnlyMainModels = AnalAccMItableMerged[(AnalAccMItableMerged['Model'].str.contains('SKZFC', na=False)) 
                                     & (AnalAccMItableMerged['Depth']=='1') 
                                     & (AnalAccMItableMerged['Comp']=='800')
                                     & (AnalAccMItableMerged['LatDim']=='30')]

OnlyAblModels = AnalAccMItableMerged[AnalAccMItableMerged['Model'].isin(AblationList)]
OnlyAblModels = OnlyAblModels[['MetricType','Model', 'Source', 'Type', 'Depth', 'LatDim', 'Comp', 
                               'MS', 'MP', 'AC', 'SS', 'RA', 'ISCOREam',	'ISCOREgm',	'ISCOREhm']].reset_index(drop=True)

# Benchmark comparison table generation
BenchCompTabs = pd.concat([OnlyMainModels, BenchAnalAccMItableMerged]).reset_index(drop=True)
BenchCompTabs = BenchCompTabs[['MetricType','Model', 'Source', 'Type', 'Depth', 'LatDim', 'Comp', 
                               'MS', 'MP', 'AC', 'SS', 'RA', 'ISCOREam',	'ISCOREgm',	'ISCOREhm']].reset_index(drop=True)


# Ablation study table generation
MainForAbl =  BestModels[['MetricType','Model', 'Source', 'Type', 'Depth', 'LatDim', 'Comp', 
            'MS', 'MP', 'AC', 'SS', 'RA', 'ISCOREam', 'ISCOREgm', 'ISCOREhm']]

AblCompTabs = pd.concat([MainForAbl, OnlyAblModels]).reset_index(drop=True)


### Save Evaluation Summary Tables to 'EvalResults/SummaryTables/' Directory

In [10]:
BenchCompTabs.to_csv('EvalResults/SummaryTables/BenchCompTabs.csv', index=False) 
AblCompTabs.to_csv('EvalResults/SummaryTables/AblCompTabs.csv', index=False) 
AggSensitivityDetailDic.to_csv('EvalResults/SummaryTables/AggSensitivityDetail.csv', index=False) 
SensitivityDetailTabs.to_csv('EvalResults/SummaryTables/SensitivityDetailTabs.csv', index=False) 
SenseAccMItable.to_csv('EvalResults/SummaryTables/SenseAccMItable.csv', index=False) 