In [9]:
import os
import pandas as pd
from collections import defaultdict

import shutil
from pathlib import Path
import glob
from math import floor, log10

In [5]:
def round_and_format_decimal(x, n=1):
    if x > 0:
        x = round(x, n - int(floor(log10(abs(x)))))
        return "{:#.2g}".format(x)
    else:
        return '0.0'

def round_and_format_scientific(x, n=1):
    if x > 0:
        x = round(x, n - int(floor(log10(abs(x)))))
        return f"{x:.0E}"
    else:
        return '0.0'

In [None]:
def get_evaluation_summaries(base_path="sciplex3/"):
    """
    Collects evaluation summary statistics grouped by model_name.

    Returns:
        dict: {model_name: [DataFrame, ...]} where each DataFrame is a summary.csv for a run.
    """
    summaries = defaultdict(list)
    if not os.path.isdir(base_path):
        raise ValueError(f"Base path does not exist: {base_path}")

    for model_name in os.listdir(base_path):
        model_dir = os.path.join(base_path, model_name)
        if not os.path.isdir(model_dir):
            continue
        for run_id in os.listdir(model_dir):
            run_dir = os.path.join(model_dir, run_id)
            eval_path = os.path.join(run_dir, "evaluation", "summary.csv")
            if os.path.isfile(eval_path):
                try:
                    df = pd.read_csv(eval_path, index_col=0)
                    summaries[model_name].append(df)
                except Exception as e:
                    print(f"Failed to read {eval_path}: {e}")
        
        # combine all dataframes for this model
        if summaries[model_name]:
            combined_df = pd.concat(summaries[model_name], axis=1)
            summaries[model_name] = combined_df
    
    return summaries


In [16]:
summary_dict = get_evaluation_summaries()
summary_dict.keys()

dict_keys(['decoder-cov', 'latent', 'latent-scgpt', 'decoder', 'sams-vae', 'linear'])

In [17]:
metrics_dict = {}
for k, runs_df in summary_dict.items():
    avg = runs_df.mean(axis=1).apply(lambda x: round_and_format_decimal(x))
    std = runs_df.std(axis=1).apply(lambda x: round_and_format_scientific(x))

    metrics = pd.Series('', index=runs_df.index)
    for m in runs_df.index.values:
        metrics.loc[m] = avg.loc[m] + " ± " + std.loc[m]

    metrics_dict[k] = metrics

metrics_df = pd.DataFrame(metrics_dict)
metrics_df

Unnamed: 0_level_0,decoder-cov,latent,latent-scgpt,decoder,sams-vae,linear
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rmse_average,0.023 ± 1E-04,0.028 ± 1E-02,0.019 ± 3E-04,0.019 ± 3E-04,0.020 ± 2E-05,0.030 ± 4E-04
rmse_rank_average,0.47 ± 0.0,0.26 ± 6E-02,0.15 ± 2E-02,0.15 ± 2E-02,0.19 ± 2E-02,0.27 ± 2E-03
cosine_pca_average,0.93 ± 2E-04,0.92 ± 7E-02,0.97 ± 1E-03,0.97 ± 1E-03,0.93 ± 7E-04,0.92 ± 1E-04
cosine_rank_pca_average,0.43 ± 0.0,0.26 ± 6E-02,0.15 ± 1E-02,0.15 ± 1E-02,0.069 ± 6E-03,0.28 ± 7E-04
cosine_logfc,0.30 ± 2E-02,0.32 ± 1E-01,0.33 ± 7E-03,0.33 ± 7E-03,0.46 ± 5E-03,0.15 ± 6E-03
cosine_rank_logfc,0.47 ± 0.0,0.28 ± 1E-01,0.20 ± 1E-02,0.20 ± 1E-02,0.21 ± 3E-02,0.28 ± 2E-03
r2_score_scores,0.0 ± 7E-03,0.0 ± 1E-02,0.0 ± 2E-03,0.0 ± 2E-03,0.0 ± 8E-03,0.0 ± 2E-03
top_k_recall_scores,0.00093 ± 5E-04,0.0 ± 0.0,0.0079 ± 5E-04,0.0079 ± 5E-04,0.00016 ± 1E-04,0.0036 ± 3E-04
mmd_pca,2.1 ± 2E-03,2.0 ± 2E-01,1.9 ± 5E-03,1.9 ± 5E-03,0.69 ± 1E-02,0.76 ± 9E-04
mmd_rank_pca,0.43 ± 0.0,0.26 ± 5E-02,0.15 ± 1E-02,0.15 ± 1E-02,0.060 ± 3E-03,0.30 ± 3E-04


In [18]:
metrics_df.to_csv("sciplex3_metrics_fixed.csv")