In [None]:
import numpy as np
import pandas as pd

from utils import load_nested_results

all_results = load_nested_results("results_better")

In [None]:
from sklearn.metrics import roc_auc_score

datasets_names = sorted(list(all_results.keys()))
datasets_names = sorted(datasets_names, key=lambda x: int(x.split("_")[0]))
n_fold = len(all_results[datasets_names[0]]["ground_truth"].keys())

all_res_summary = {}

for dataset in datasets_names:
    results = all_results[dataset]

    all_res = []
    for fold in range(n_fold):
        y_true = results["ground_truth"][fold]

        if len(np.unique(y_true)) == 1:
            continue

        fold_res = []

        for model_name in list(results.keys() - {"ground_truth"}):
            y_scores = results[model_name][fold]["scores"]
            auc = roc_auc_score(y_true, y_scores)
            fold_res.append(auc)

        all_res.append(fold_res)
    all_res = np.array(all_res)
    mean_res = np.mean(all_res, axis=0)
    std_res = np.std(all_res, axis=0)

    all_res_summary[dataset.split("_")[1]] = [
        f"{mean_res[i]:.2f} ± {std_res[i]:.2f}" for i in range(len(mean_res))
    ]

In [None]:
df = pd.DataFrame.from_dict(
    all_res_summary,
    orient="index",
    columns=list(results.keys() - {"ground_truth"}),
)
df

In [None]:
df_numeric = df.map(lambda x: float(str(x).split("±")[0].strip()))
df_numeric.median(axis=1)