In [1]:
# Notebook : debug_deit_pipeline.ipynb
# ─────────────────────────────────────
# Choisissez le chemin de votre fichier de config (.py)
CFG_PATH = "config/cfg_deit.py"      # <- adaptez si besoin

import sys, os, torch, torchvision, random, numpy as np
sys.path.append(os.path.dirname(CFG_PATH))
cfg = __import__(os.path.basename(CFG_PATH).replace(".py", ""))

# Reproducibilité minimale
SEED = 123
torch.manual_seed(SEED);  np.random.seed(SEED);  random.seed(SEED)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("CUDA:", torch.cuda.is_available(), "| device:", device)


CUDA: True | device: cuda:0


In [2]:
from src.load_model import load_model
from src.data_loader_mask import load_data_train_test
from src.transform import image_transform_train, image_transform_test, mask_transform

print("Transform TRAIN  :", image_transform_train(size=cfg.image_size))
print("Transform TEST   :", image_transform_test(size=cfg.image_size))


# ⬇︎ same params que votre script principal
# train_loader, test_loader = load_data_train_test(
#     train_original_path = cfg.train_original_path,
#     test_original_path  = cfg.test_original_path,
#     train_modified_path = cfg.train_modified_path,
#     test_modified_path  = cfg.test_modified_path,
#     mask_path_train     = cfg.train_mask_path,
#     mask_path_test      = cfg.test_mask_path,
#     batch_size          = cfg.batch_size,
#     image_transform_train=image_transform_train(size=cfg.image_size),
#     image_transform_test =image_transform_test(size=cfg.image_size),
#     mask_transform_train =mask_transform(size=cfg.image_size),
#     mask_transform_test  =mask_transform(size=cfg.image_size),
#     image_size          = cfg.image_size,
#     num_workers         = 0,
# )

# # IMPORTANT : loader sans mélange pour comparer indice par indice
# train_loader.shuffle = False
# test_loader.shuffle  = False

model = load_model(cfg.model_name, device=device, cfg=cfg).to(device)
model.eval();  # backbone + head figés
print("Model loaded.")


Transform TRAIN  : Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
Transform TEST   : Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


  from .autonotebook import tqdm as notebook_tqdm


Model loaded.


In [3]:
# ── 3bis. Dataset & DataLoader déterministes ─────────────────────────
# 1) Transformations "neutres" : aucun Random*, même normalisation que DeiT
from src.transform import image_transform_test, mask_transform  # test = resize + ToTensor + Normalize

deterministic_img_tf = image_transform_test(size=cfg.image_size)  # ≃ resize + ToTensor + Normalize
deterministic_msk_tf = mask_transform(size=cfg.image_size)

# 2) On reconstruit explicitement le Dataset d'entraînement
from src.data_loader_mask import CustomDataset  # ← adapte si ton dataset a un autre nom
train_dataset_det = CustomDataset(
    original_dir   = cfg.train_original_path,
    modified_dir  = cfg.train_modified_path,
    mask_dir   = cfg.train_mask_path,
    image_transform    = deterministic_img_tf,
    mask_transform     = deterministic_msk_tf,
    # image_size  = cfg.image_size
)

# 3) DataLoader sans mélange
from torch.utils.data import DataLoader, SequentialSampler
train_loader_det = DataLoader(
    train_dataset_det,
    batch_size     = cfg.batch_size,
    sampler        = SequentialSampler(train_dataset_det),  # 🔑 ordre fixe
    num_workers    = 0,
    pin_memory     = False
)

print(f"Dataset déterministe : {len(train_dataset_det)} images")


Dataset déterministe : 118 images


In [5]:
# ── 4bis. Pré‑compute embeddings sans shuffle, puis comparaison ──────
from models.deit_tiny import precompute_deit_tiny_features

# 1) (ré)génère les embeddings déterministes
train_feats_det = precompute_deit_tiny_features(model, train_loader_det, device=device)

# 2) On prélève un batch du loader déterministe
imgs, labels, _ = next(iter(train_loader_det))
imgs = imgs.to(device)

with torch.no_grad():
    feats_live = model.forward_features(imgs)[:, 0]       # (B, 192)

# 3) Embeddings pré‑calculés, mêmes indices 0…B‑1
feats_pre = torch.stack([train_feats_det[i][0] for i in range(imgs.size(0))])[:, 0, :]  # (B,192)

# 4) Différence
delta = (feats_live - feats_pre).abs().max()
print(f"Δmax entre embeddings live & pré‑calc (déterministe) : {delta:.3e}")

if delta < 1e-3:
    print("✅  Pipelines maintenant identiques.")
else:
    print("❌  Toujours un écart : vérifie encore model.eval() ou la Normalization.")


[DEBUG] deit_tiny.py loaded
Δmax entre embeddings live & pré‑calc (déterministe) : 2.279e+00
❌  Toujours un écart : vérifie encore model.eval() ou la Normalization.


In [10]:
with torch.no_grad():
    logits = model.head(feats_live.to(device))
preds = logits.argmax(1)
acc_batch = (preds.cpu() == labels.cpu()).float().mean()
print(f"Accuracy tête (batch) : {acc_batch:.2%}")


Accuracy tête (batch) : 87.50%


In [11]:
def accuracy_from_images(model, loader):
    model.eval()
    ok = tot = 0
    with torch.no_grad():
        for imgs, labels, _ in loader:
            feats = model.forward_features(imgs.to(device))[:, 0]
            logits = model.head(feats)
            ok  += (logits.argmax(1).cpu() == labels).sum().item()
            tot += labels.size(0)
    return ok / tot

def accuracy_from_feats(model, feats_ds):
    ok = tot = 0
    with torch.no_grad():
        for feats, labels in feats_ds:
            logits = model.head(feats.to(device))
            ok  += (logits.argmax() == labels.to(device)).item()
            tot += 1
    return ok / tot

print("Acc. images brutes  :", accuracy_from_images(model, train_loader)*100, "%")
print("Acc. embeddings pre :", accuracy_from_feats(model, train_feats_ds)*100, "%")


Acc. images brutes  : 51.69491525423729 %


ValueError: too many values to unpack (expected 2)