In [1]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader

# Import de tes modules custom
sys.path.append("..")
from src.data_utils import BrainScanDataset
from src.models import get_brainscan_model
from src.engine import train_one_epoch, evaluate_model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üöÄ Moteur de calcul : {device}")

üöÄ Moteur de calcul : cpu


### Strat√©gie de split

In [2]:
df = pd.read_csv("../data/metadata_with_weak_labels.csv")

# 1. Isolation du Test Set (Uniquement des labels forts, jamais vus)
df_strong_all = df[df["label"] != -1].copy()
df_test = df_strong_all.sample(n=20, random_state=42)
df_train_strong = df_strong_all.drop(df_test.index)

# 2. Le set de Training Faible (les 1406 images issues du clustering)
df_train_weak = df[df["label"] == -1].copy()

# Sauvegarde temporaire pour les DataLoaders
df_test.to_csv("../data/temp_test.csv", index=False)
df_train_strong.to_csv("../data/temp_train_strong.csv", index=False)
df_train_weak.to_csv("../data/temp_train_weak.csv", index=False)

### Pr√©paration des DataLoaders

In [3]:
from torchvision import transforms

# Transformation standard (celle de l'√©tape 2)
train_transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

# DataLoaders
# Note : Pour train_weak, on utilise 'weak_label' g√©n√©r√© √† l'√©tape 3
loader_weak = DataLoader(
    BrainScanDataset(
        "../data/temp_train_weak.csv", transform=train_transform, label_col="weak_label"
    ),
    batch_size=32,
    shuffle=True,
)
loader_strong = DataLoader(
    BrainScanDataset(
        "../data/temp_train_strong.csv", transform=train_transform, label_col="label"
    ),
    batch_size=16,
    shuffle=True,
)
loader_test = DataLoader(
    BrainScanDataset(
        "../data/temp_test.csv", transform=train_transform, label_col="label"
    ),
    batch_size=16,
    shuffle=False,
)

### Exp√©rience A - Baseline Supervis√©e
#### Le mod√®le apprend uniquement sur les 80 images de Clara.

In [4]:
print("--- EXP√âRIENCE A : SUPERVIS√â PUR ---")
model_sup = get_brainscan_model(device=device)
optimizer = optim.Adam(model_sup.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

for epoch in range(10):
    loss = train_one_epoch(model_sup, loader_strong, optimizer, criterion, device)
    if epoch % 2 == 0:
        print(f"√âpoque {epoch} | Loss: {loss:.4f}")

f1_sup, report_sup = evaluate_model(model_sup, loader_test, device)

--- EXP√âRIENCE A : SUPERVIS√â PUR ---


                                             

√âpoque 0 | Loss: 0.4481


                                             

√âpoque 2 | Loss: 0.0480


                                             

√âpoque 4 | Loss: 0.0084


                                             

√âpoque 6 | Loss: 0.0019


                                             

√âpoque 8 | Loss: 0.0044


                                             

### Exp√©rience B - Semi-Supervis√©e (Pseudo-Labeling)
#### Phase 1 sur les labels "faibles", puis Phase 2 sur les labels "forts".

In [5]:
# --- EXP√âRIENCE B : SEMI-SUPERVIS√â (AVEC LOGGING DES √âPOQUES) ---
print("--- EXP√âRIENCE B : SEMI-SUPERVIS√â ---")

# 1. Alignement automatique des labels
mapping_counts = pd.crosstab(df_strong_all["label"], df["weak_label"])
correct_label_for_cancer = int(mapping_counts.loc[1].idxmax())

if correct_label_for_cancer == 0:
    print("‚ö†Ô∏è Inversion d√©tect√©e ! Correction des weak_labels en cours...")
    df["weak_label"] = df["weak_label"].map({0: 1, 1: 0})
else:
    print("‚úÖ Alignement des labels correct.")

# 2. Pr√©paration des loaders
df_train_weak = df[df["label"] == -1].copy()
loader_weak = DataLoader(
    BrainScanDataset(df_train_weak, transform=train_transform, label_col="weak_label"),
    batch_size=32,
    shuffle=True,
)

# 3. Initialisation
model_semi = get_brainscan_model(num_classes=2, device=device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_semi.parameters(), lr=1e-4)

# PHASE 1 : Pr√©-entra√Ænement (Labels Faibles)
num_epochs_p1 = 5
print(f"\nüöÄ Phase 1 : Pre-training ({num_epochs_p1} √©poques)")
for epoch in range(num_epochs_p1):
    loss = train_one_epoch(model_semi, loader_weak, optimizer, criterion, device)
    # Affichage clair : Epoch 1/5, Epoch 2/5, etc.
    print(f" > √âpoque [{epoch+1}/{num_epochs_p1}] | Loss Moyenne: {loss:.4f}")

# PHASE 2 : Fine-tuning (Labels Forts)
num_epochs_p2 = 5
print(f"\nüöÄ Phase 2 : Fine-tuning ({num_epochs_p2} √©poques)")
optimizer = optim.Adam(model_semi.parameters(), lr=1e-5)

for epoch in range(num_epochs_p2):
    loss = train_one_epoch(model_semi, loader_strong, optimizer, criterion, device)
    print(f" > √âpoque [{epoch+1}/{num_epochs_p2}] | Loss Moyenne: {loss:.4f}")

# 4. √âvaluation
f1_semi, report_semi = evaluate_model(model_semi, loader_test, device)
print(f"\nüìä R√©sultat Final Semi-Supervis√© : F1 = {f1_semi:.4f}")
print(report_semi)

--- EXP√âRIENCE B : SEMI-SUPERVIS√â ---
‚ö†Ô∏è Inversion d√©tect√©e ! Correction des weak_labels en cours...

üöÄ Phase 1 : Pre-training (5 √©poques)


                                               

 > √âpoque [1/5] | Loss Moyenne: 0.1137


                                               

 > √âpoque [2/5] | Loss Moyenne: 0.0211


                                               

 > √âpoque [3/5] | Loss Moyenne: 0.0205


                                               

 > √âpoque [4/5] | Loss Moyenne: 0.0214


                                               

 > √âpoque [5/5] | Loss Moyenne: 0.0111

üöÄ Phase 2 : Fine-tuning (5 √©poques)


                                             

 > √âpoque [1/5] | Loss Moyenne: 0.9270


                                             

 > √âpoque [2/5] | Loss Moyenne: 0.5634


                                             

 > √âpoque [3/5] | Loss Moyenne: 0.3067


                                             

 > √âpoque [4/5] | Loss Moyenne: 0.1922


                                             

 > √âpoque [5/5] | Loss Moyenne: 0.1193

üìä R√©sultat Final Semi-Supervis√© : F1 = 0.8571
              precision    recall  f1-score   support

      Normal       0.86      1.00      0.92        12
      Cancer       1.00      0.75      0.86         8

    accuracy                           0.90        20
   macro avg       0.93      0.88      0.89        20
weighted avg       0.91      0.90      0.90        20



### Comparaison des r√©sultats

In [6]:
print(f"üìä F1-Score Supervis√© : {f1_sup:.4f}")
print(f"üìä F1-Score Semi-Supervis√© : {f1_semi:.4f}")

if f1_semi > f1_sup:
    print("\n‚úÖ Succ√®s : L'approche semi-supervis√©e am√©liore la g√©n√©ralisation.")
else:
    print(
        "\n‚ö†Ô∏è Note : Le gain est faible, la qualit√© des clusters de l'√©tape 3 est √† revoir."
    )

üìä F1-Score Supervis√© : 1.0000
üìä F1-Score Semi-Supervis√© : 0.8571

‚ö†Ô∏è Note : Le gain est faible, la qualit√© des clusters de l'√©tape 3 est √† revoir.


In [7]:
# V√©rification du mapping
print(pd.crosstab(df_strong_all["label"], df_strong_all["weak_label"]))

weak_label   0   1
label             
0            2  48
1           38  12
