In [None]:
import pandas as pd

import numpy as np
import sys
from pathlib import Path
import ipynbname
import os
from pathlib import Path

ROOT = Path(os.getcwd()).resolve().parent.parent


In [11]:
path = ROOT / 'Experiments/Results/baseline/all_compliante_scenarios_allbaseline.csv'
df=pd.read_csv(path)
mask_meta = (df["method"] == "Meta Space")
mask_valid = (df["classifier"] == "XGBoost") & (df["embedding_model"] == "minilm")

df = df[~mask_meta | (mask_meta & mask_valid)]


  df=pd.read_csv(path)


In [12]:
# Correction : appliquer l'ordre des méthodes correctement
import pandas as pd

def format_latex(mean, std):
    if pd.isna(mean):
        return "--"
    return f"${mean:.2f}\\pm{std:.2f}$"

scenarios = {
    "si_si": "(S1, I1) $\\leftrightarrow$ (S2, I2)",
    "s_s_bis": "(S1, $\varnothing$) $\\leftrightarrow$ (S2, $\varnothing$)",
    "i_i_bis": "($\varnothing$, I1) $\\leftrightarrow$ ($\varnothing$, I2)",
}


# Assure-toi que df_all_bis existe et contient 'method' et les colonnes agrégées
# Normaliser les chaînes (enlever espaces parasites)
df['method'] = df['method'].astype(str).str.strip()

# 1) calculer grouped (comme tu faisais)
grouped = (
    df.groupby(['scenario', 'method'], as_index=False)
          .agg(
              precision_all_mean=('All_Precision', 'mean'),
              precision_all_std =('All_Precision', 'std'),
              recall_all_mean   =('All_Recall', 'mean'),
              recall_all_std    =('All_Recall', 'std'),
              f1_mean           =('F1', 'mean'),
              f1_std            =('F1', 'std'),
              recall_at_gt_mean =('Recall@GT', 'mean'),
              recall_at_gt_std  =('Recall@GT', 'std'),
          )
)

# 2) définir l'ordre désiré des méthodes (extraction depuis df_all_bis ou liste manuelle)
method_order = df['method'].drop_duplicates().tolist()
# Option : forcer "Meta Space" en dernier si présent
if "Meta Space" in method_order:
    method_order = [m for m in method_order if m != "Meta Space"] + ["Meta Space"]
method_order=['Meta Space','Magneto', 'MagnetoFT', 'MagnetoFTGPT', 'MagnetoGPT', 'Coma++', 'ISResMat', 'Distribution', 'SimFlooding', 'Coma', 'Cupid']

# 3) appliquer Categorical sur la colonne 'method' de grouped AVANT de prendre df_scen
grouped['method'] = pd.Categorical(grouped['method'], categories=method_order, ordered=True)

# 4) trier grouped par scenario puis par méthode (qui utilisera l'ordre catégorique)
grouped = grouped.sort_values(['scenario','method']).reset_index(drop=True)

# 5) ensuite, pour chaque scenario, on ré-extrait df_scen depuis grouped (et il est déjà trié)
for scen, df_scen in grouped.groupby('scenario', sort=False):

    formatted = df_scen[['method']].rename(columns={'method': 'Method'}).copy()
    formatted['Precision']    = [format_latex(m, s) for m, s in zip(df_scen['precision_all_mean'], df_scen['precision_all_std'])]
    formatted['Recall']       = [format_latex(m, s) for m, s in zip(df_scen['recall_all_mean'], df_scen['recall_all_std'])]
    formatted['F1-score']     = [format_latex(m, s) for m, s in zip(df_scen['f1_mean'], df_scen['f1_std'])]
    formatted['Recall@|GT|']  = [format_latex(m, s) for m, s in zip(df_scen['recall_at_gt_mean'], df_scen['recall_at_gt_std'])]

    formatted = formatted[['Method', 'Precision', 'Recall', 'F1-score', 'Recall@|GT|']]

    latex_code = formatted.to_latex(
        escape=False,
        index=False,
        column_format="l c c c c c",
        caption=f"Average performance (mean ± std) per baseline — Scenario {scenarios[scen]}.",
        label=f"tab:metrics_{scen}"
    )

    print("\n")
    print(latex_code)




\begin{table}
\caption{Average performance (mean ± std) per baseline — Scenario ($arnothing$, I1) $\leftrightarrow$ ($arnothing$, I2).}
\label{tab:metrics_i_i_bis}
\begin{tabular}{l c c c c c}
\toprule
Method & Precision & Recall & F1-score & Recall@|GT| \\
\midrule
Meta Space & $0.99\pm0.03$ & $0.98\pm0.05$ & $0.99\pm0.04$ & $1.00\pm0.02$ \\
Magneto & $0.55\pm0.33$ & $0.87\pm0.23$ & $0.62\pm0.31$ & $0.70\pm0.34$ \\
MagnetoFT & $0.54\pm0.32$ & $0.87\pm0.20$ & $0.61\pm0.30$ & $0.70\pm0.32$ \\
MagnetoFTGPT & $0.77\pm0.35$ & $0.92\pm0.10$ & $0.76\pm0.31$ & $0.78\pm0.22$ \\
MagnetoGPT & $0.77\pm0.34$ & $0.92\pm0.10$ & $0.77\pm0.31$ & $0.74\pm0.23$ \\
Coma++ & $0.65\pm0.36$ & $0.57\pm0.29$ & $0.53\pm0.26$ & $0.69\pm0.34$ \\
ISResMat & $0.45\pm0.27$ & $0.66\pm0.26$ & $0.49\pm0.25$ & $0.43\pm0.30$ \\
Distribution & $0.58\pm0.35$ & $0.46\pm0.32$ & $0.47\pm0.29$ & $0.44\pm0.34$ \\
SimFlooding & $0.60\pm0.33$ & $1.00\pm0.06$ & $0.69\pm0.31$ & $0.53\pm0.35$ \\
Coma & $0.60\pm0.33$ & $1.00\pm0