In [1]:
# =========================================================
# üîß Configuration initiale : Seed, Device, Chemins
# =========================================================
import numpy as np, random, os
import torch

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"‚úÖ Seed={SEED} | Device={DEVICE}")
torch.cuda.empty_cache() if DEVICE=="cuda" else None


‚úÖ Seed=42 | Device=cuda


In [2]:
# =========================================================
# üìÅ Configuration des chemins (standardis√©e)
# =========================================================
import os
from pathlib import Path
import sys

# --- D√©tection du projet (ex√©cution depuis notebooks/) ---
PROJECT_ROOT = Path(os.getenv("PROJECT_ROOT", "..")).resolve()
DATAGENERATOR_PATH = PROJECT_ROOT / "p9dg"

# Dossiers globaux du projet
MODELS_DIR      = PROJECT_ROOT / "models"
ARTIFACTS_DIR   = PROJECT_ROOT / "artifacts"
OUTPUTS_DIR     = PROJECT_ROOT / "outputs"
DATA_ROOT       = Path(os.getenv("DATA_ROOT", PROJECT_ROOT / "data")).resolve()
CONFIG_DIR      = Path(os.getenv("CONFIG_DIR", PROJECT_ROOT / "configs")).resolve()

# Dossiers sp√©cifiques pour les m√©triques
METRICS_DIR     = ARTIFACTS_DIR / "metrics"
METRICS_DIR.mkdir(parents=True, exist_ok=True)

# Cr√©ation des dossiers
for d in [MODELS_DIR, ARTIFACTS_DIR, OUTPUTS_DIR, CONFIG_DIR, METRICS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# Ajout du datagenerator au path
if str(DATAGENERATOR_PATH) not in sys.path:
    sys.path.append(str(DATAGENERATOR_PATH))

# Ajout de la racine du projet au path pour les imports
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

# Imports package-qualifi√©s
from p9dg.histo_dataset import HistoDataset
from metrics.fid_lpips_eval import FIDLPIPSEvaluator, run_eval_experiment, run_eval_paired_experiment

# Affichage des dossiers configur√©s
print("‚úÖ Dossiers configur√©s:")
print(f"   PROJECT_ROOT    : {PROJECT_ROOT}")
print(f"   DATA_ROOT       : {DATA_ROOT}")
print(f"   CONFIG_DIR      : {CONFIG_DIR}")
print(f"   MODELS_DIR      : {MODELS_DIR}")
print(f"   ARTIFACTS_DIR   : {ARTIFACTS_DIR}")
print(f"   OUTPUTS_DIR     : {OUTPUTS_DIR}")
print(f"   METRICS_DIR     : {METRICS_DIR}")
print("‚úÖ Imports r√©ussis: HistoDataset, FIDLPIPSEvaluator")

‚úÖ Dossiers configur√©s:
   PROJECT_ROOT    : /workspace
   DATA_ROOT       : /workspace/data
   CONFIG_DIR      : /workspace/configs
   MODELS_DIR      : /workspace/models
   ARTIFACTS_DIR   : /workspace/artifacts
   OUTPUTS_DIR     : /workspace/outputs
   METRICS_DIR     : /workspace/artifacts/metrics
‚úÖ Imports r√©ussis: HistoDataset, FIDLPIPSEvaluator


In [3]:
# ==========================================================
# üßÆ Classe d‚Äô√©valuation FID + LPIPS multi-classes
# ==========================================================

import pandas as pd
import numpy as np
from tqdm import tqdm
from p9dg.utils.class_mappings import class_labels
import random, torch, warnings
from torch_fidelity import calculate_metrics
import lpips
from torchvision import transforms
from PIL import Image
import tempfile, shutil, os

import warnings

# --- Masquer les warnings non critiques ---
warnings.filterwarnings("ignore", category=FutureWarning, module="lpips.lpips")
warnings.filterwarnings("ignore", category=UserWarning, module="torch_fidelity.datasets")

In [4]:
# --- UNI2-h embedder (timm + HF Hub, m√™me archi que 07/08) ---
import os, numpy as np, torch, timm
from PIL import Image
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

def make_uni2h_embedder_timm(device=None, hf_token=None):
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    # Autorisation Hugging Face pour timm (hf-hub:...)
    hf_token = hf_token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
    if hf_token and "HUGGINGFACE_HUB_TOKEN" not in os.environ:
        os.environ["HUGGINGFACE_HUB_TOKEN"] = hf_token

    timm_kwargs = {
        "img_size": 224,
        "patch_size": 14,
        "depth": 24,
        "num_heads": 24,
        "embed_dim": 1536,
        "mlp_ratio": 2.66667 * 2,  # UNI-2h
        "init_values": 1e-5,
        "num_classes": 0,
        "no_embed_class": True,
        "reg_tokens": 8,
        "dynamic_img_size": True,
        "mlp_layer": timm.layers.SwiGLUPacked,
        "act_layer": torch.nn.SiLU,
    }

    uni = timm.create_model("hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs).eval().to(device)
    cfg = resolve_data_config(uni.pretrained_cfg, model=uni)  # idem √† 07/08
    tfm = create_transform(**cfg)

    @torch.no_grad()
    def uni_embed(path: str) -> np.ndarray:
        img = Image.open(path).convert("RGB")
        x = tfm(img).unsqueeze(0).to(device)
        feat = uni(x)                # (1, 1536)
        if feat.ndim == 4:           # just-in-case
            feat = feat.mean(dim=(-2, -1))
        return feat.squeeze(0).float().cpu().numpy().astype(np.float64)

    # petit sanity check (d√©sactive si tu veux)
    # print("UNI-2h ready ‚Üí dim", int(uni.num_features), "| device:", device)
    return uni_embed


In [5]:
from huggingface_hub import login
import os

HF_TOKEN = os.getenv("HF_TOKEN")
# V√©rifie qu'il est charg√© (ne jamais l'afficher publiquement)
assert HF_TOKEN is not None, "HF_TOKEN n'est pas d√©fini dans l'environnement !"

In [6]:
uni_embed = make_uni2h_embedder_timm() 

In [7]:
# Test 1: Mode dossiers avec FID_UNI
df = run_eval_experiment(
    name="real_vs_pixcell_kid_prc_uni",
    real_root=OUTPUTS_DIR / "pixcell_synth" / "train" / "real",
    gen_root=OUTPUTS_DIR / "pixcell_synth" / "train" / "synth",
    save_dir=METRICS_DIR,
    seed=SEED,
    uni_embed=uni_embed,
    fid_uni_max=50,   # ‚Üê sample max pour FID_UNI
    max_images_per_class=50,  # ‚Üê idem pour FID/KID/PRC
    lpips_pairs=50,   # ‚Üê LPIPS pair√© born√©
)


üéØ Exp√©rience : real_vs_pixcell_kid_prc_uni
üìÅ real_root = /workspace/outputs/pixcell_synth/train/real
üìÅ gen_root  = /workspace/outputs/pixcell_synth/train/synth
üì¶ classes √©valu√©es : ['ADI', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : real vs synth
üìÇ Classes √©valu√©es : ADI, DEB, LYM, MUC, MUS, NORM, STR, TUM
üíæ Rapport : /workspace/artifacts/metrics/real_synth_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [02:12<00:00, 16.54s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/real_synth_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/real_vs_pixcell_kid_prc_uni_metrics.csv





In [8]:
df

Unnamed: 0,class,n_real,n_synth,FID,KID_mean,KID_std,LPIPS,FID_UNI,experiment,max_images_per_class,lpips_pairs
0,ADI,50,50,324.605324,0.197659,1.884952e-07,0.583401,351.774707,real_vs_pixcell_kid_prc_uni,50,50
1,DEB,50,50,360.086695,0.305777,1.659293e-07,0.6866,320.297837,real_vs_pixcell_kid_prc_uni,50,50
2,LYM,50,50,330.514219,0.316774,1.93412e-07,0.71274,402.193924,real_vs_pixcell_kid_prc_uni,50,50
3,MUC,50,50,331.362883,0.294824,1.734017e-07,0.657867,305.472846,real_vs_pixcell_kid_prc_uni,50,50
4,MUS,50,50,336.253769,0.213358,1.61501e-07,0.660323,410.913382,real_vs_pixcell_kid_prc_uni,50,50
5,NORM,50,50,397.912413,0.315392,1.789366e-07,0.725148,430.095939,real_vs_pixcell_kid_prc_uni,50,50
6,STR,50,50,368.99626,0.292684,1.400833e-07,0.635372,317.360711,real_vs_pixcell_kid_prc_uni,50,50
7,TUM,50,50,304.533535,0.192312,1.695027e-07,0.638846,378.783228,real_vs_pixcell_kid_prc_uni,50,50


In [9]:
# Test 2: Mode pair√© avec CSV
import pandas as pd
import inspect

# Chercher le CSV dans diff√©rents emplacements possibles
csv_candidates = [
#    OUTPUTS_DIR / "pixcell_synth" / "pairs_real_synth.csv",
    OUTPUTS_DIR / "07_diffusion_model" / "pixcell_out_histo" / "pixcell_metadata_*.csv",
    ARTIFACTS_DIR / "pairs_real_synth.csv",
]

csv_path = None
for candidate in csv_candidates:
    if "*" in str(candidate):
        # Chercher avec glob √† partir du parent du pattern
        matches = sorted(candidate.parent.glob(candidate.name))
        if matches:
            csv_path = matches[0]
            break
    elif candidate.exists():
        csv_path = candidate
        break

if csv_path is None:
    print("‚ö†Ô∏è Aucun CSV trouv√©, cr√©ation d'un CSV de test...")
    # Cr√©er un CSV minimal pour les tests
    csv_path = METRICS_DIR / "test_pairs.csv"
    # Le CSV sera cr√©√© si n√©cessaire dans les tests suivants
else:
    print(f"‚úÖ CSV trouv√©: {csv_path}")

# 1) Lire et d√©tecter les colonnes
if csv_path.exists():
    dfp = pd.read_csv(csv_path)
    cols = set(dfp.columns.str.lower())

    REAL_CANDS  = ["ref_path","real_path","real","path_real"]
    GEN_CANDS   = ["out_path","gen_path","synth_path","generated","path_gen"]
    CLASS_CANDS = ["ref_label","label","class","cls","category"]

    def pick(cands):
        for c in cands:
            if c in cols: 
                # retourne le nom EXACT pr√©sent dans le df (respecte la casse)
                return next(x for x in dfp.columns if x.lower()==c)
        return None

    real_col  = pick(REAL_CANDS)
    gen_col   = pick(GEN_CANDS)
    class_col = pick(CLASS_CANDS) or None  # None -> tout regroup√© en 'ALL' (g√©r√© par le runner)

    if real_col is None or gen_col is None:
        raise KeyError(f"Colonnes introuvables : real={REAL_CANDS}, gen={GEN_CANDS}\nTrouv√©es: {list(dfp.columns)}")

    print("Colonnes:", {"real_col":real_col, "gen_col":gen_col, "class_col":class_col})

    # 2) Normaliser les chemins (si relatifs)
    def norm_path(p):
        p = Path(str(p))
        return str(p if p.is_absolute() else (PROJECT_ROOT / p))

    dfp[real_col] = dfp[real_col].map(norm_path)
    dfp[gen_col]  = dfp[gen_col].map(norm_path)

    tmp_csv = METRICS_DIR / "pairs_real_synth.normalized.csv"
    dfp.to_csv(tmp_csv, index=False)

    # 3) Appel robuste (d√©tecte si ta version supporte FID_UNI et lpips_pairs, etc.)
    sig = inspect.signature(run_eval_paired_experiment)
    kwargs = dict(
        name="paired_pixcell_kid_prc_uni",
        pairs_csv=tmp_csv,
        save_dir=METRICS_DIR,
        seed=SEED,
        real_col=real_col,
        gen_col=gen_col,
    )
    if "class_col" in sig.parameters: kwargs["class_col"] = class_col
    if "lpips_pairs" in sig.parameters: kwargs["lpips_pairs"] = 50

    # Active FID_UNI si tu as d√©j√† d√©fini uni_embed (UNI2-h timm). Sinon, il sera ignor√©.
    try:
        uni_embed  # noqa
        if "uni_embed" in sig.parameters: kwargs["uni_embed"] = uni_embed
        if "fid_uni_max" in sig.parameters: kwargs["fid_uni_max"] = 50
    except NameError:
        pass

    df_paired = run_eval_paired_experiment(**kwargs)
    df_paired
else:
    print("‚ö†Ô∏è CSV introuvable, test ignor√©")


‚úÖ CSV trouv√©: /workspace/outputs/07_diffusion_model/pixcell_out_histo/pixcell_metadata_1763635740.csv
Colonnes: {'real_col': 'ref_path', 'gen_col': 'out_path', 'class_col': 'ref_label'}
üìë CSV : /workspace/artifacts/metrics/pairs_real_synth.normalized.csv  (5625 lignes)
üîç Colonnes d√©tect√©es : ['global_index', 'split', 'ref_path', 'ref_label', 'ref_label_full', 'ref_color', 'seed', 'out_path', 'guidance_scale', 'steps']
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
üéØ Exp√©rience pair√©e : paired_pixcell_kid_prc_uni  |  Device: cuda
üì¶ Classes : ['ADI', 'BACK', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']


√âvaluation pair√©e: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [02:25<00:00, 16.13s/it]


‚úÖ Rapport pair√© enregistr√© : /workspace/artifacts/metrics/paired_pixcell_kid_prc_uni_paired_metrics.csv





In [10]:
df_paired

Unnamed: 0,class,n_pairs,FID,KID_mean,KID_std,LPIPS,FID_UNI,Precision,Recall
0,ADI,50,139.855971,0.032395,1.305203e-07,0.450707,219.964929,,
1,BACK,50,111.803277,0.023069,1.612554e-07,0.263312,242.112444,,
2,DEB,50,101.90504,0.025436,1.488911e-07,0.404742,196.763217,,
3,LYM,50,81.029595,0.034067,1.75896e-07,0.327944,253.358143,,
4,MUC,50,142.482193,0.013708,1.453584e-07,0.456694,161.691089,,
5,MUS,50,156.852417,0.04156,1.653518e-07,0.411078,257.436283,,
6,NORM,50,161.103947,0.056751,1.869012e-07,0.454551,309.579836,,
7,STR,50,108.689386,0.023189,2.013797e-07,0.410843,206.780352,,
8,TUM,50,134.567216,0.027516,1.908322e-07,0.465834,248.25818,,


In [11]:
# Test 3: FIDLPIPSEvaluator direct (mono-classe)
evaluator = FIDLPIPSEvaluator(
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=OUTPUTS_DIR / "07_diffusion_model" / "pixcell_out_histo",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    seed=SEED
)
df = evaluator.run()


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs pixcell_out_histo
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_pixcell_out_histo_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:15<00:00, 15.60s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_pixcell_out_histo_TUM.csv





In [12]:
display(df)

Unnamed: 0,class,n_real,n_synth,FID,KID_mean,KID_std,LPIPS,FID_UNI
0,TUM,400,400,75.650615,0.039632,0.007157,0.488061,


In [13]:
# Test 4: Sanity check - r√©el vs r√©el (deux tirages ind√©pendants dans le m√™me dossier)
df = run_eval_experiment(
    name="real_vs_real_TUM",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],  # ou None pour toutes
    drop_back_variant=True,
    max_images_per_class=400,
    lpips_pairs=50,
    seed=SEED
)

display(df)


üéØ Exp√©rience : real_vs_real_TUM
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:12<00:00, 12.83s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/real_vs_real_TUM_metrics.csv





Unnamed: 0,class,n_real,n_synth,FID,KID_mean,KID_std,LPIPS,FID_UNI,experiment,max_images_per_class,lpips_pairs
0,TUM,400,400,-4e-05,-0.001565,0.00297,0.0,,real_vs_real_TUM,400,50


In [14]:
# Test 5: R√©el vs normalis√© 
run_eval_experiment(
    name="real_vs_normalized_tst",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=OUTPUTS_DIR / "preprocessing" / "normalized_tst",
    save_dir=METRICS_DIR,
    classes=None,  # toutes les classes
    max_images_per_class=400,
    drop_back_variant=True,
    lpips_pairs=50,
    seed=SEED
)

üéØ Exp√©rience : real_vs_normalized_tst
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/outputs/preprocessing/normalized_tst
üì¶ classes √©valu√©es : ['ADI', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs normalized_tst
üìÇ Classes √©valu√©es : ADI, DEB, LYM, MUC, MUS, NORM, STR, TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_normalized_tst_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [01:46<00:00, 13.27s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_normalized_tst_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/real_vs_normalized_tst_metrics.csv





Unnamed: 0,class,n_real,n_synth,FID,KID_mean,KID_std,LPIPS,FID_UNI,experiment,max_images_per_class,lpips_pairs
0,ADI,400,234,72.266041,0.013956,0.005381,0.515938,,real_vs_normalized_tst,400,50
1,DEB,400,400,46.05196,0.016499,0.00439,0.438681,,real_vs_normalized_tst,400,50
2,LYM,400,400,24.937919,0.005874,0.003616,0.34039,,real_vs_normalized_tst,400,50
3,MUC,400,332,62.448501,0.011907,0.004021,0.516443,,real_vs_normalized_tst,400,50
4,MUS,400,400,90.221001,0.046691,0.01536,0.541433,,real_vs_normalized_tst,400,50
5,NORM,400,313,56.352615,0.010777,0.003895,0.47696,,real_vs_normalized_tst,400,50
6,STR,400,365,54.46928,0.022072,0.006451,0.47028,,real_vs_normalized_tst,400,50
7,TUM,400,400,52.528122,0.013849,0.004562,0.452923,,real_vs_normalized_tst,400,50


In [15]:
# Test 6: R√©el vs synth√©tique (UNI-2h+PixCell gated)
run_eval_experiment(
    name="real_vs_UNI_Pixcell_pretrained_unpaired",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=OUTPUTS_DIR / "07_diffusion_model" / "pixcell_out_histo",
    save_dir=METRICS_DIR,
    classes=None,  # toutes les classes
    max_images_per_class=300,
    drop_back_variant=True,
    lpips_pairs=50,
    seed=SEED
)

üéØ Exp√©rience : real_vs_UNI_Pixcell_pretrained_unpaired
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/outputs/07_diffusion_model/pixcell_out_histo
üì¶ classes √©valu√©es : ['ADI', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs pixcell_out_histo
üìÇ Classes √©valu√©es : ADI, DEB, LYM, MUC, MUS, NORM, STR, TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_pixcell_out_histo_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [01:39<00:00, 12.43s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_pixcell_out_histo_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/real_vs_UNI_Pixcell_pretrained_unpaired_metrics.csv





Unnamed: 0,class,n_real,n_synth,FID,KID_mean,KID_std,LPIPS,FID_UNI,experiment,max_images_per_class,lpips_pairs
0,ADI,300,300,89.973312,0.035722,0.006167,0.53127,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
1,DEB,300,300,66.907315,0.028825,0.007638,0.531308,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
2,LYM,300,300,57.393346,0.036262,0.005436,0.368316,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
3,MUC,300,300,86.217879,0.026552,0.005423,0.543316,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
4,MUS,300,300,102.296928,0.039298,0.007233,0.567855,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
5,NORM,300,300,102.317033,0.05758,0.009548,0.530825,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
6,STR,300,300,74.653237,0.030879,0.006296,0.513105,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50
7,TUM,300,300,82.215442,0.039617,0.006617,0.494496,,real_vs_UNI_Pixcell_pretrained_unpaired,300,50


In [16]:
# Test 7: Mode pair√© avec CSV PixCell (toutes les paires)
# Chercher le CSV PixCell
pixcell_csvs = list(OUTPUTS_DIR.glob("pixcell_out_histo/pixcell_metadata_*.csv"))
if pixcell_csvs:
    csv_path = sorted(pixcell_csvs)[-1]  # Prendre le plus r√©cent
    print(f"‚úÖ Utilisation du CSV: {csv_path.name}")
    
    df = run_eval_paired_experiment(
        name="paired_UNI_PixCell",
        pairs_csv=csv_path,
        save_dir=METRICS_DIR,
        # class_col="ref_label",      # par d√©faut
        lpips_pairs=None,             # None = toutes les paires
        seed=SEED
    )
    display(df)
else:
    print("‚ö†Ô∏è Aucun CSV PixCell trouv√© dans OUTPUTS_DIR/pixcell_out_histo/")


‚ö†Ô∏è Aucun CSV PixCell trouv√© dans OUTPUTS_DIR/pixcell_out_histo/


# =========================================================
# üß™ Tests suppl√©mentaires pour couverture compl√®te
# =========================================================


## Test 1: PRC activ√©/d√©sactiv√© en mode pair√©
V√©rifier que PRC n'appara√Æt que quand `compute_prc=True` en mode pair√©


In [17]:
# Test PRC activ√© (par d√©faut)
if 'tmp_csv' in locals() and Path(tmp_csv).exists():
    df_prc_on = run_eval_paired_experiment(
        name="test_prc_enabled",
        pairs_csv=tmp_csv,
        save_dir=METRICS_DIR,
        real_col=real_col if 'real_col' in locals() else "real_path",
        gen_col=gen_col if 'gen_col' in locals() else "synth_path",
        class_col=class_col if 'class_col' in locals() else "class",
        lpips_pairs=10,  # Petit √©chantillon pour test rapide
        compute_prc=True,  # PRC activ√©
        seed=SEED
    )

    print("‚úÖ Colonnes avec PRC activ√©:", list(df_prc_on.columns))
    assert "Precision" in df_prc_on.columns, "Precision devrait √™tre pr√©sente avec PRC activ√©"
    assert "Recall" in df_prc_on.columns, "Recall devrait √™tre pr√©sente avec PRC activ√©"
    print("‚úÖ PRC activ√©: OK")

    # Test PRC d√©sactiv√©
    df_prc_off = run_eval_paired_experiment(
        name="test_prc_disabled",
        pairs_csv=tmp_csv,
        save_dir=METRICS_DIR,
        real_col=real_col if 'real_col' in locals() else "real_path",
        gen_col=gen_col if 'gen_col' in locals() else "synth_path",
        class_col=class_col if 'class_col' in locals() else "class",
        lpips_pairs=10,
        compute_prc=False,  # PRC d√©sactiv√©
        seed=SEED
    )

    print("‚úÖ Colonnes avec PRC d√©sactiv√©:", list(df_prc_off.columns))
    assert "Precision" not in df_prc_off.columns, "Precision ne devrait PAS √™tre pr√©sente avec PRC d√©sactiv√©"
    assert "Recall" not in df_prc_off.columns, "Recall ne devrait PAS √™tre pr√©sente avec PRC d√©sactiv√©"
    print("‚úÖ PRC d√©sactiv√©: OK")
else:
    print("‚ö†Ô∏è tmp_csv non disponible, test PRC ignor√©")


üìë CSV : /workspace/artifacts/metrics/pairs_real_synth.normalized.csv  (5625 lignes)
üîç Colonnes d√©tect√©es : ['global_index', 'split', 'ref_path', 'ref_label', 'ref_label_full', 'ref_color', 'seed', 'out_path', 'guidance_scale', 'steps']
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
üéØ Exp√©rience pair√©e : test_prc_enabled  |  Device: cuda
üì¶ Classes : ['ADI', 'BACK', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']


√âvaluation pair√©e: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [00:45<00:00,  5.01s/it]



‚úÖ Rapport pair√© enregistr√© : /workspace/artifacts/metrics/test_prc_enabled_paired_metrics.csv
‚úÖ Colonnes avec PRC activ√©: ['class', 'n_pairs', 'FID', 'KID_mean', 'KID_std', 'LPIPS', 'FID_UNI', 'Precision', 'Recall']
‚úÖ PRC activ√©: OK
üìë CSV : /workspace/artifacts/metrics/pairs_real_synth.normalized.csv  (5625 lignes)
üîç Colonnes d√©tect√©es : ['global_index', 'split', 'ref_path', 'ref_label', 'ref_label_full', 'ref_color', 'seed', 'out_path', 'guidance_scale', 'steps']
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
üéØ Exp√©rience pair√©e : test_prc_disabled  |  Device: cuda
üì¶ Classes : ['ADI', 'BACK', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']


√âvaluation pair√©e: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [00:45<00:00,  5.00s/it]


‚úÖ Rapport pair√© enregistr√© : /workspace/artifacts/metrics/test_prc_disabled_paired_metrics.csv
‚úÖ Colonnes avec PRC d√©sactiv√©: ['class', 'n_pairs', 'FID', 'KID_mean', 'KID_std', 'LPIPS', 'FID_UNI']
‚úÖ PRC d√©sactiv√©: OK





## Test 2: V√©rification que PRC n'appara√Æt jamais en mode dossiers
V√©rifier que les colonnes Precision/Recall ne sont jamais pr√©sentes en mode dossiers


In [18]:
# Test mode dossiers - v√©rifier absence de Precision/Recall
df_dirs = run_eval_experiment(
    name="test_mode_dossiers_no_prc",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=50,
    lpips_pairs=10,
    seed=SEED
)

print("‚úÖ Colonnes en mode dossiers:", list(df_dirs.columns))
assert "Precision" not in df_dirs.columns, "Precision ne devrait JAMAIS √™tre pr√©sente en mode dossiers"
assert "Recall" not in df_dirs.columns, "Recall ne devrait JAMAIS √™tre pr√©sente en mode dossiers"
print("‚úÖ Mode dossiers sans PRC: OK")


üéØ Exp√©rience : test_mode_dossiers_no_prc
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:06<00:00,  6.17s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_mode_dossiers_no_prc_metrics.csv
‚úÖ Colonnes en mode dossiers: ['class', 'n_real', 'n_synth', 'FID', 'KID_mean', 'KID_std', 'LPIPS', 'FID_UNI', 'experiment', 'max_images_per_class', 'lpips_pairs']
‚úÖ Mode dossiers sans PRC: OK





## Test 3: Reproductibilit√© avec diff√©rentes seeds
V√©rifier que les r√©sultats sont reproductibles avec la m√™me seed


In [19]:
# Test reproductibilit√© - m√™me seed = m√™mes r√©sultats
df_seed1 = run_eval_experiment(
    name="test_repro_seed42_run1",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=50,
    lpips_pairs=10,
    seed=SEED
)

df_seed2 = run_eval_experiment(
    name="test_repro_seed42_run2",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=50,
    lpips_pairs=10,
    seed=SEED
)

# V√©rifier que les r√©sultats sont identiques
import numpy as np
for col in ["FID", "KID_mean", "LPIPS"]:
    if col in df_seed1.columns and col in df_seed2.columns:
        diff = np.abs(df_seed1[col].values - df_seed2[col].values)
        assert np.allclose(diff, 0, atol=1e-6), f"Colonne {col} diff√®re entre les deux runs avec seed=42"
        print(f"‚úÖ {col}: reproductible (diff max: {diff.max():.2e})")

print("‚úÖ Reproductibilit√©: OK")


üéØ Exp√©rience : test_repro_seed42_run1
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:06<00:00,  6.28s/it]



‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_repro_seed42_run1_metrics.csv
üéØ Exp√©rience : test_repro_seed42_run2
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:06<00:00,  6.14s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_repro_seed42_run2_metrics.csv
‚úÖ FID: reproductible (diff max: 0.00e+00)
‚úÖ KID_mean: reproductible (diff max: 0.00e+00)
‚úÖ LPIPS: reproductible (diff max: 0.00e+00)
‚úÖ Reproductibilit√©: OK





## Test 4: FID_UNI activ√©/d√©sactiv√©
V√©rifier que FID_UNI n'appara√Æt que quand uni_embed est fourni


In [20]:
# Test avec FID_UNI activ√©
df_uni_on = run_eval_experiment(
    name="test_fid_uni_enabled",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=30,
    lpips_pairs=10,
    seed=SEED,
    uni_embed=uni_embed if 'uni_embed' in locals() else None,
    fid_uni_max=20
)

print("‚úÖ Colonnes avec FID_UNI:", list(df_uni_on.columns))
if 'uni_embed' in locals():
    assert "FID_UNI" in df_uni_on.columns, "FID_UNI devrait √™tre pr√©sente avec uni_embed"
    assert not df_uni_on["FID_UNI"].isna().all(), "FID_UNI ne devrait pas √™tre tout NaN"
    print("‚úÖ FID_UNI activ√©: OK")
else:
    print("‚ö†Ô∏è uni_embed non disponible, test FID_UNI ignor√©")

# Test sans FID_UNI
df_uni_off = run_eval_experiment(
    name="test_fid_uni_disabled",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=30,
    lpips_pairs=10,
    seed=SEED,
    uni_embed=None,  # Pas de FID_UNI
)

print("‚úÖ Colonnes sans FID_UNI:", list(df_uni_off.columns))
# FID_UNI peut √™tre pr√©sente mais avec NaN
if "FID_UNI" in df_uni_off.columns:
    assert df_uni_off["FID_UNI"].isna().all(), "FID_UNI devrait √™tre NaN sans uni_embed"
    print("‚úÖ FID_UNI d√©sactiv√© (NaN): OK")


üéØ Exp√©rience : test_fid_uni_enabled
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:10<00:00, 10.77s/it]



‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_fid_uni_enabled_metrics.csv
‚úÖ Colonnes avec FID_UNI: ['class', 'n_real', 'n_synth', 'FID', 'KID_mean', 'KID_std', 'LPIPS', 'FID_UNI', 'experiment', 'max_images_per_class', 'lpips_pairs']
‚úÖ FID_UNI activ√©: OK
üéØ Exp√©rience : test_fid_uni_disabled
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:05<00:00,  5.96s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_fid_uni_disabled_metrics.csv
‚úÖ Colonnes sans FID_UNI: ['class', 'n_real', 'n_synth', 'FID', 'KID_mean', 'KID_std', 'LPIPS', 'FID_UNI', 'experiment', 'max_images_per_class', 'lpips_pairs']
‚úÖ FID_UNI d√©sactiv√© (NaN): OK





## Test 5: Gestion des classes (s√©lection, intersection, drop_back_variant)


In [21]:
# Test avec classes sp√©cifiques
df_classes_specific = run_eval_experiment(
    name="test_classes_specific",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM", "LYM"],  # Classes sp√©cifiques
    max_images_per_class=30,
    lpips_pairs=10,
    seed=SEED
)

print("‚úÖ Classes √©valu√©es:", sorted(df_classes_specific["class"].unique()))
assert set(df_classes_specific["class"].unique()) == {"TUM", "LYM"}, "Seules TUM et LYM devraient √™tre pr√©sentes"
print("‚úÖ Classes sp√©cifiques: OK")

# Test avec drop_back_variant
df_no_back = run_eval_experiment(
    name="test_drop_back",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=None,  # Toutes les classes
    drop_back_variant=True,  # Exclure BACK
    max_images_per_class=30,
    lpips_pairs=10,
    seed=SEED
)

print("‚úÖ Classes sans BACK:", sorted(df_no_back["class"].unique()))
assert "BACK" not in df_no_back["class"].values, "BACK ne devrait pas √™tre pr√©sent avec drop_back_variant=True"
print("‚úÖ drop_back_variant: OK")


üéØ Exp√©rience : test_classes_specific
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM', 'LYM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM, LYM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM_LYM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:11<00:00,  5.97s/it]



‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM_LYM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_classes_specific_metrics.csv
‚úÖ Classes √©valu√©es: ['LYM', 'TUM']
‚úÖ Classes sp√©cifiques: OK
üéØ Exp√©rience : test_drop_back
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['ADI', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : ADI, DEB, LYM, MUC, MUS, NORM, STR, TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:45<00:00,  5.73s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_ADI_DEB_LYM_MUC_MUS_NORM_STR_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_drop_back_metrics.csv
‚úÖ Classes sans BACK: ['ADI', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']
‚úÖ drop_back_variant: OK





## Test 6: Mode pair√© avec CSV sans colonne de classe (regroupement en 'ALL')


In [22]:
# Cr√©er un CSV temporaire sans colonne de classe
if 'tmp_csv' in locals():
    df_test = pd.read_csv(tmp_csv)
    # Supprimer temporairement la colonne de classe
    if class_col in df_test.columns:
        df_test_no_class = df_test.drop(columns=[class_col])
        tmp_csv_no_class = METRICS_DIR / "pairs_test_no_class.csv"
        df_test_no_class.to_csv(tmp_csv_no_class, index=False)
        
        df_all = run_eval_paired_experiment(
            name="test_no_class_col",
            pairs_csv=tmp_csv_no_class,
            save_dir=METRICS_DIR,
            real_col=real_col,
            gen_col=gen_col,
            class_col=None,  # Pas de colonne de classe
            lpips_pairs=10,
            seed=SEED
        )
        
        print("‚úÖ Classes d√©tect√©es:", sorted(df_all["class"].unique()))
        assert "ALL" in df_all["class"].values or len(df_all["class"].unique()) == 1, "Devrait regrouper en 'ALL'"
        print("‚úÖ Regroupement en 'ALL': OK")
    else:
        print("‚ö†Ô∏è Colonne de classe non trouv√©e dans le CSV de test")
else:
    print("‚ö†Ô∏è tmp_csv non disponible, test ignor√©")


üìë CSV : /workspace/artifacts/metrics/pairs_test_no_class.csv  (5625 lignes)
üîç Colonnes d√©tect√©es : ['global_index', 'split', 'ref_path', 'ref_label_full', 'ref_color', 'seed', 'out_path', 'guidance_scale', 'steps']
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
üéØ Exp√©rience pair√©e : test_no_class_col  |  Device: cuda
üì¶ Classes : ['ALL']


√âvaluation pair√©e: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:05<00:00,  5.22s/it]


‚úÖ Rapport pair√© enregistr√© : /workspace/artifacts/metrics/test_no_class_col_paired_metrics.csv
‚úÖ Classes d√©tect√©es: ['ALL']
‚úÖ Regroupement en 'ALL': OK





## Test 7: Validation des erreurs (chemins invalides, classes manquantes)


In [23]:
# Test avec chemin invalide
try:
    evaluator = FIDLPIPSEvaluator(
        real_root="/chemin/inexistant",
        gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
        classes=["TUM"]
    )
    assert False, "Devrait lever FileNotFoundError"
except FileNotFoundError as e:
    print(f"‚úÖ Erreur chemin invalide captur√©e: {type(e).__name__}")

# Test avec classe inexistante
try:
    evaluator = FIDLPIPSEvaluator(
        real_root=DATA_ROOT / "NCT-CRC-HE-100K",
        gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
        classes=["CLASSE_INEXISTANTE"]
    )
    assert False, "Devrait lever ValueError"
except ValueError as e:
    print(f"‚úÖ Erreur classe inexistante captur√©e: {type(e).__name__}")

# Test avec gen_root None
try:
    evaluator = FIDLPIPSEvaluator(
        real_root=DATA_ROOT / "NCT-CRC-HE-100K",
        gen_root=None,
        classes=["TUM"]
    )
    assert False, "Devrait lever FileNotFoundError"
except FileNotFoundError as e:
    print(f"‚úÖ Erreur gen_root None captur√©e: {type(e).__name__}")

print("‚úÖ Validation des erreurs: OK")


‚úÖ Erreur chemin invalide captur√©e: FileNotFoundError
‚úÖ Erreur classe inexistante captur√©e: ValueError
‚úÖ Erreur gen_root None captur√©e: FileNotFoundError
‚úÖ Validation des erreurs: OK


## Test 8: Diff√©rentes valeurs de lpips_pairs et max_images_per_class


In [24]:
# Test avec lpips_pairs limit√©
df_lpips_limited = run_eval_experiment(
    name="test_lpips_limited",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=100,
    lpips_pairs=5,  # Tr√®s limit√©
    seed=SEED
)

print(f"‚úÖ LPIPS avec lpips_pairs=5: {df_lpips_limited['LPIPS'].values[0]:.6f}")
assert not np.isnan(df_lpips_limited['LPIPS'].values[0]), "LPIPS ne devrait pas √™tre NaN"
print("‚úÖ lpips_pairs limit√©: OK")

# Test avec max_images_per_class petit
df_small_max = run_eval_experiment(
    name="test_small_max_images",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=10,  # Tr√®s petit
    lpips_pairs=5,
    seed=SEED
)

print(f"‚úÖ n_real avec max_images=10: {df_small_max['n_real'].values[0]}")
assert df_small_max['n_real'].values[0] <= 10, "n_real ne devrait pas d√©passer max_images_per_class"
print("‚úÖ max_images_per_class: OK")


üéØ Exp√©rience : test_lpips_limited
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:07<00:00,  7.23s/it]



‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_lpips_limited_metrics.csv
‚úÖ LPIPS avec lpips_pairs=5: 0.000000
‚úÖ lpips_pairs limit√©: OK
üéØ Exp√©rience : test_small_max_images
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:05<00:00,  5.52s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_small_max_images_metrics.csv
‚úÖ n_real avec max_images=10: 10
‚úÖ max_images_per_class: OK





## Test 9: Coh√©rence des colonnes entre modes
V√©rifier que les colonnes sont coh√©rentes entre mode dossiers et mode pair√©


In [25]:
# Colonnes attendues en mode dossiers
expected_cols_dirs = {"class", "n_real", "n_synth", "FID", "KID_mean", "KID_std", "LPIPS", "FID_UNI"}
df_test_dirs = run_eval_experiment(
    name="test_cols_dirs",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=30,
    lpips_pairs=10,
    seed=SEED
)

actual_cols_dirs = set(df_test_dirs.columns) - {"experiment", "max_images_per_class", "lpips_pairs"}
print(f"‚úÖ Colonnes mode dossiers: {sorted(actual_cols_dirs)}")
assert expected_cols_dirs.issubset(actual_cols_dirs), f"Colonnes manquantes: {expected_cols_dirs - actual_cols_dirs}"
assert "Precision" not in actual_cols_dirs, "Precision ne devrait pas √™tre en mode dossiers"
assert "Recall" not in actual_cols_dirs, "Recall ne devrait pas √™tre en mode dossiers"
print("‚úÖ Colonnes mode dossiers: OK")

# Colonnes attendues en mode pair√© (avec PRC)
if 'tmp_csv' in locals():
    df_test_paired = run_eval_paired_experiment(
        name="test_cols_paired",
        pairs_csv=tmp_csv,
        save_dir=METRICS_DIR,
        real_col=real_col,
        gen_col=gen_col,
        class_col=class_col,
        lpips_pairs=10,
        compute_prc=True,
        seed=SEED
    )
    
    expected_cols_paired = {"class", "n_pairs", "FID", "KID_mean", "KID_std", "LPIPS", "FID_UNI", "Precision", "Recall"}
    actual_cols_paired = set(df_test_paired.columns)
    print(f"‚úÖ Colonnes mode pair√© (avec PRC): {sorted(actual_cols_paired)}")
    assert expected_cols_paired.issubset(actual_cols_paired), f"Colonnes manquantes: {expected_cols_paired - actual_cols_paired}"
    print("‚úÖ Colonnes mode pair√©: OK")
else:
    print("‚ö†Ô∏è tmp_csv non disponible, test colonnes pair√© ignor√©")


üéØ Exp√©rience : test_cols_dirs
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:05<00:00,  5.89s/it]



‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_cols_dirs_metrics.csv
‚úÖ Colonnes mode dossiers: ['FID', 'FID_UNI', 'KID_mean', 'KID_std', 'LPIPS', 'class', 'n_real', 'n_synth']
‚úÖ Colonnes mode dossiers: OK
üìë CSV : /workspace/artifacts/metrics/pairs_real_synth.normalized.csv  (5625 lignes)
üîç Colonnes d√©tect√©es : ['global_index', 'split', 'ref_path', 'ref_label', 'ref_label_full', 'ref_color', 'seed', 'out_path', 'guidance_scale', 'steps']
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
üéØ Exp√©rience pair√©e : test_cols_paired  |  Device: cuda
üì¶ Classes : ['ADI', 'BACK', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']


√âvaluation pair√©e: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [00:45<00:00,  5.03s/it]


‚úÖ Rapport pair√© enregistr√© : /workspace/artifacts/metrics/test_cols_paired_paired_metrics.csv
‚úÖ Colonnes mode pair√© (avec PRC): ['FID', 'FID_UNI', 'KID_mean', 'KID_std', 'LPIPS', 'Precision', 'Recall', 'class', 'n_pairs']
‚úÖ Colonnes mode pair√©: OK





## Test 10: Sanity check - valeurs attendues
V√©rifier que les m√©triques ont des valeurs raisonnables


In [26]:
# Test r√©el vs r√©el - FID et LPIPS devraient √™tre proches de 0
df_sanity = run_eval_experiment(
    name="test_sanity_check",
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM"],
    max_images_per_class=50,
    lpips_pairs=20,
    seed=SEED
)

fid_val = df_sanity["FID"].values[0]
lpips_val = df_sanity["LPIPS"].values[0]

print(f"‚úÖ FID (r√©el vs r√©el): {fid_val:.6f}")
print(f"‚úÖ LPIPS (r√©el vs r√©el): {lpips_val:.6f}")

# FID devrait √™tre tr√®s proche de 0 (m√™me distribution)
assert abs(fid_val) < 1.0, f"FID r√©el vs r√©el devrait √™tre < 1.0, obtenu: {fid_val}"
# LPIPS devrait √™tre tr√®s proche de 0 (m√™mes images)
assert lpips_val < 0.1, f"LPIPS r√©el vs r√©el devrait √™tre < 0.1, obtenu: {lpips_val}"

print("‚úÖ Sanity check valeurs: OK")


üéØ Exp√©rience : test_sanity_check
üìÅ real_root = /workspace/data/NCT-CRC-HE-100K
üìÅ gen_root  = /workspace/data/NCT-CRC-HE-100K
üì¶ classes √©valu√©es : ['TUM']
üé≤ seed = 42
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:06<00:00,  6.22s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
üíæ R√©sultats sauvegard√©s ‚Üí /workspace/artifacts/metrics/test_sanity_check_metrics.csv
‚úÖ FID (r√©el vs r√©el): -0.000059
‚úÖ LPIPS (r√©el vs r√©el): 0.000000
‚úÖ Sanity check valeurs: OK





## Test 11: Mode pair√© avec lpips_pairs=None (toutes les paires)


In [27]:
# Test avec toutes les paires (lpips_pairs=None)
if 'tmp_csv' in locals():
    df_all_pairs = run_eval_paired_experiment(
        name="test_all_pairs",
        pairs_csv=tmp_csv,
        save_dir=METRICS_DIR,
        real_col=real_col,
        gen_col=gen_col,
        class_col=class_col,
        lpips_pairs=None,  # Toutes les paires
        seed=SEED
    )
    
    # V√©rifier que toutes les paires disponibles sont utilis√©es
    df_source = pd.read_csv(tmp_csv)
    if class_col and class_col in df_source.columns:
        for cls in df_all_pairs["class"].unique():
            n_expected = len(df_source[df_source[class_col] == cls])
            n_actual = df_all_pairs[df_all_pairs["class"] == cls]["n_pairs"].values[0]
            print(f"‚úÖ Classe {cls}: {n_actual} paires utilis√©es (sur {n_expected} disponibles)")
            # Peut √™tre inf√©rieur si certaines images sont invalides
            assert n_actual <= n_expected, f"n_pairs ({n_actual}) ne devrait pas d√©passer le nombre de lignes ({n_expected})"
    
    print("‚úÖ lpips_pairs=None (toutes les paires): OK")
else:
    print("‚ö†Ô∏è tmp_csv non disponible, test lpips_pairs=None ignor√©")


üìë CSV : /workspace/artifacts/metrics/pairs_real_synth.normalized.csv  (5625 lignes)
üîç Colonnes d√©tect√©es : ['global_index', 'split', 'ref_path', 'ref_label', 'ref_label_full', 'ref_color', 'seed', 'out_path', 'guidance_scale', 'steps']
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
üéØ Exp√©rience pair√©e : test_all_pairs  |  Device: cuda
üì¶ Classes : ['ADI', 'BACK', 'DEB', 'LYM', 'MUC', 'MUS', 'NORM', 'STR', 'TUM']


√âvaluation pair√©e: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [03:43<00:00, 24.87s/it]


‚úÖ Rapport pair√© enregistr√© : /workspace/artifacts/metrics/test_all_pairs_paired_metrics.csv
‚úÖ Classe ADI: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe BACK: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe DEB: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe LYM: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe MUC: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe MUS: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe NORM: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe STR: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ Classe TUM: 625 paires utilis√©es (sur 625 disponibles)
‚úÖ lpips_pairs=None (toutes les paires): OK





## Test 12: FIDLPIPSEvaluator direct (mono-classe et multi-classes)


In [28]:
# Test mono-classe
evaluator_mono = FIDLPIPSEvaluator(
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes="TUM",  # String au lieu de liste
    max_images=30,
    lpips_pairs=10,
    seed=SEED
)
df_mono = evaluator_mono.run()
print(f"‚úÖ Mono-classe (string): {len(df_mono)} classe(s)")
assert len(df_mono) == 1, "Devrait avoir exactement 1 classe"
assert df_mono["class"].values[0] == "TUM", "Classe devrait √™tre TUM"
print("‚úÖ Mono-classe: OK")

# Test multi-classes
evaluator_multi = FIDLPIPSEvaluator(
    real_root=DATA_ROOT / "NCT-CRC-HE-100K",
    gen_root=DATA_ROOT / "NCT-CRC-HE-100K",
    save_dir=METRICS_DIR,
    classes=["TUM", "LYM", "MUC"],  # Liste
    max_images=30,
    lpips_pairs=10,
    seed=SEED
)
df_multi = evaluator_multi.run()
print(f"‚úÖ Multi-classes: {len(df_multi)} classe(s)")
assert len(df_multi) == 3, "Devrait avoir exactement 3 classes"
assert set(df_multi["class"].values) == {"TUM", "LYM", "MUC"}, "Classes devraient √™tre TUM, LYM, MUC"
print("‚úÖ Multi-classes: OK")


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:05<00:00,  5.73s/it]



‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM.csv
‚úÖ Mono-classe (string): 1 classe(s)
‚úÖ Mono-classe: OK
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.11/site-packages/lpips/weights/v0.1/alex.pth
‚úÖ Initialis√© : NCT-CRC-HE-100K vs NCT-CRC-HE-100K
üìÇ Classes √©valu√©es : TUM, LYM, MUC
üíæ Rapport : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM_LYM_MUC.csv


√âvaluation multi-classes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:17<00:00,  5.68s/it]


‚úÖ Rapport enregistr√© : /workspace/artifacts/metrics/NCT-CRC-HE-100K_NCT-CRC-HE-100K_TUM_LYM_MUC.csv
‚úÖ Multi-classes: 3 classe(s)
‚úÖ Multi-classes: OK





## R√©sum√© des tests


In [29]:
print("""
‚úÖ Tests de couverture compl√®te :

1. ‚úÖ PRC activ√©/d√©sactiv√© en mode pair√©
2. ‚úÖ V√©rification absence PRC en mode dossiers
3. ‚úÖ Reproductibilit√© avec seeds
4. ‚úÖ FID_UNI activ√©/d√©sactiv√©
5. ‚úÖ Gestion des classes (s√©lection, drop_back_variant)
6. ‚úÖ Mode pair√© sans colonne de classe (regroupement 'ALL')
7. ‚úÖ Validation des erreurs (chemins invalides, classes manquantes)
8. ‚úÖ Diff√©rentes valeurs de lpips_pairs et max_images_per_class
9. ‚úÖ Coh√©rence des colonnes entre modes
10. ‚úÖ Sanity check - valeurs attendues (r√©el vs r√©el)
11. ‚úÖ Mode pair√© avec lpips_pairs=None
12. ‚úÖ FIDLPIPSEvaluator direct (mono et multi-classes)

Tous les tests passent ! üéâ
""")



‚úÖ Tests de couverture compl√®te :

1. ‚úÖ PRC activ√©/d√©sactiv√© en mode pair√©
2. ‚úÖ V√©rification absence PRC en mode dossiers
3. ‚úÖ Reproductibilit√© avec seeds
4. ‚úÖ FID_UNI activ√©/d√©sactiv√©
5. ‚úÖ Gestion des classes (s√©lection, drop_back_variant)
6. ‚úÖ Mode pair√© sans colonne de classe (regroupement 'ALL')
7. ‚úÖ Validation des erreurs (chemins invalides, classes manquantes)
8. ‚úÖ Diff√©rentes valeurs de lpips_pairs et max_images_per_class
9. ‚úÖ Coh√©rence des colonnes entre modes
10. ‚úÖ Sanity check - valeurs attendues (r√©el vs r√©el)
11. ‚úÖ Mode pair√© avec lpips_pairs=None
12. ‚úÖ FIDLPIPSEvaluator direct (mono et multi-classes)

Tous les tests passent ! üéâ

