# Pipeline d'Orchestration - D√©tection d'An√©vrismes

Ce notebook orchestre l'ensemble du pipeline de d√©tection d'an√©vrismes :
1. Analyse exploratoire des donn√©es (EDA)
2. Cr√©ation du dataset
3. Augmentation de donn√©es
4. Entra√Ænement du mod√®le
5. √âvaluation

**Avantages de cette approche** :
- Pipeline centralis√© et reproductible
- Classes modulaires et r√©utilisables
- Facile √† d√©ployer en production
- Testable et maintenable

## Imports

In [None]:
import sys
sys.path.append("../")

import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

# Import des paths et config
from src import (
    SERIES_DIR, TRAIN_CSV, TRAIN_LOCALIZERS_CSV,
    PROCESSED_DIR, MODELS_DIR, CHECKPOINTS_DIR,
    print_config
)

# Import des bricks (composants de pipeline)
from src.bricks import (
    Preprocessor,
    DatasetBuilder,
    Augmentor,
    EDA,
    Trainer,
    Predictor
)

# Import du mod√®le
from src.models import UNet3DClassifier

print("‚úÖ Imports r√©ussis")

## Configuration

In [None]:
# Afficher la configuration des chemins
print_config()

In [None]:
# Param√®tres du pipeline
MODALITY = "CTA"  # Modalit√© √† traiter
CUBE_SIZE = 48
N_AUGMENTATIONS = 12
BATCH_SIZE = 8
EPOCHS = 10
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"Device: {DEVICE}")
print(f"Modalit√©: {MODALITY}")
print(f"Cube size: {CUBE_SIZE}")

## 1Ô∏è‚É£ Phase 1 : Analyse Exploratoire (EDA)

In [None]:
# Charger les donn√©es
df_train = pd.read_csv(TRAIN_CSV)
df_localizers = pd.read_csv(TRAIN_LOCALIZERS_CSV)

print(f"S√©ries totales: {len(df_train)}")
print(f"Localisateurs: {len(df_localizers)}")

In [None]:
# Cr√©er l'analyseur EDA
eda = EDA(df_train, df_localizers, SERIES_DIR)

# G√©n√©rer le rapport complet
eda.generate_report()

In [None]:
# Visualiser les distributions
eda.plot_aneurysm_distribution()

## 2Ô∏è‚É£ Phase 2 : Cr√©ation du Dataset

In [None]:
# Cr√©er le preprocessor
preprocessor = Preprocessor(
    target_spacing=(0.4, 0.4, 0.4),
    crop_threshold=0.1
)

print(preprocessor)

In [None]:
# Cr√©er le dataset builder
dataset_builder = DatasetBuilder(
    preprocessor=preprocessor,
    cube_size=CUBE_SIZE,
    series_dir=SERIES_DIR
)

print(dataset_builder)

In [None]:
# Construire le dataset pour la modalit√© choisie
dataset = dataset_builder.build_dataset(
    df_train=df_train,
    df_localizers=df_localizers,
    modality=MODALITY
)

In [None]:
# Sauvegarder le dataset
dataset_path = os.path.join(PROCESSED_DIR, f"{MODALITY}_dataset.npz")
dataset_builder.save(dataset, dataset_path)

## 3Ô∏è‚É£ Phase 3 : Augmentation de Donn√©es

In [None]:
# Cr√©er l'augmentor
augmentor = Augmentor(
    n_augmentations=N_AUGMENTATIONS,
    grid_size=3,
    max_displacement=3.0
)

print(augmentor)

In [None]:
# Augmenter le dataset (seulement les positifs)
dataset_augmented = augmentor.augment_dataset(
    dataset,
    augment_negatives=False
)

In [None]:
# Sauvegarder le dataset augment√©
augmented_path = os.path.join(PROCESSED_DIR, f"{MODALITY}_dataset_augmented.npz")
augmentor.save(dataset_augmented, augmented_path)

## 4Ô∏è‚É£ Phase 4 : Pr√©paration pour l'Entra√Ænement

In [None]:
# Cr√©er un PyTorch Dataset
class CubesDataset(Dataset):
    def __init__(self, dataset_dict):
        self.cubes = dataset_dict['cubes']
        self.labels = dataset_dict['labels']
        self.positions = dataset_dict['positions']
    
    def __len__(self):
        return len(self.cubes)
    
    def __getitem__(self, idx):
        cube = torch.tensor(self.cubes[idx], dtype=torch.float32).unsqueeze(0)
        label = self.labels[idx]
        position = self.positions[idx]
        
        # Concat√©ner position (13) et label (1) ‚Üí (14,)
        y = np.concatenate([position, [label]], axis=0)
        y = torch.tensor(y, dtype=torch.float32)
        
        return cube, y

print("‚úÖ Dataset class d√©finie")

In [None]:
# Split train/val (80/20)
from sklearn.model_selection import train_test_split

n_samples = len(dataset_augmented['cubes'])
indices = np.arange(n_samples)

train_idx, val_idx = train_test_split(
    indices,
    test_size=0.2,
    random_state=42,
    stratify=dataset_augmented['labels']
)

# Cr√©er les sous-datasets
train_data = {
    'cubes': dataset_augmented['cubes'][train_idx],
    'labels': dataset_augmented['labels'][train_idx],
    'positions': dataset_augmented['positions'][train_idx]
}

val_data = {
    'cubes': dataset_augmented['cubes'][val_idx],
    'labels': dataset_augmented['labels'][val_idx],
    'positions': dataset_augmented['positions'][val_idx]
}

print(f"Train: {len(train_idx)} cubes")
print(f"Val: {len(val_idx)} cubes")

In [None]:
# Cr√©er les DataLoaders
train_dataset = CubesDataset(train_data)
val_dataset = CubesDataset(val_data)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

## 5Ô∏è‚É£ Phase 5 : Entra√Ænement

In [None]:
# Cr√©er le mod√®le
model = UNet3DClassifier(in_ch=1, base_ch=32)

print(model)

In [None]:
# Loss combin√©e
def combined_loss(pred, target, alpha=0.1):
    """Loss combin√©e: BCE pour positions + BCE pour label"""
    pos_pred = torch.sigmoid(pred[:, :13])
    pos_target = target[:, :13]
    label_pred = pred[:, 13:]
    label_target = target[:, 13:]
    
    loss_pos = nn.functional.binary_cross_entropy(pos_pred, pos_target)
    loss_label = nn.functional.binary_cross_entropy_with_logits(label_pred, label_target)
    
    return alpha * loss_pos + loss_label

# Optimiseur
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

print("‚úÖ Loss et optimiseur d√©finis")

In [None]:
# Cr√©er le trainer
trainer = Trainer(
    model=model,
    criterion=combined_loss,
    optimizer=optimizer,
    device=DEVICE,
    checkpoint_dir=CHECKPOINTS_DIR
)

print(trainer)

In [None]:
# Lancer l'entra√Ænement
trainer.fit(
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=EPOCHS
)

In [None]:
# Visualiser l'historique
trainer.plot_history()

In [None]:
# Sauvegarder le mod√®le final
final_model_path = os.path.join(MODELS_DIR, f"{MODALITY}_model_final.pth")
trainer.save_checkpoint(final_model_path)

## 6Ô∏è‚É£ Phase 6 : Inf√©rence (Optionnel)

In [None]:
# Cr√©er le predictor
predictor = Predictor(
    model=model,
    preprocessor=preprocessor,
    device=DEVICE,
    cube_size=CUBE_SIZE
)

# Charger le meilleur mod√®le
# predictor.load_model(best_model_path)

print(predictor)

In [None]:
# Exemple d'inf√©rence sur un nouveau volume
# test_patient_path = os.path.join(SERIES_DIR, "<SeriesInstanceUID>")
# prediction = predictor.predict_volume(test_patient_path, threshold=0.5)
# print(prediction)

## üéâ Pipeline Termin√©

Vous avez ex√©cut√© le pipeline complet :
- ‚úÖ Analyse exploratoire
- ‚úÖ Cr√©ation du dataset
- ‚úÖ Augmentation
- ‚úÖ Entra√Ænement
- ‚úÖ √âvaluation

Les r√©sultats sont sauvegard√©s dans :
- `results/processed/` : Datasets
- `results/models/` : Mod√®les entra√Æn√©s
- `results/checkpoints/` : Checkpoints d'entra√Ænement