In [None]:
import sys
sys.path.append("../")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# Import des classes et fonctions depuis src
from src import (
    SERIES_DIR, TRAIN_CSV, TRAIN_LOCALIZERS_CSV, OUTPUT_DIR,
    print_config
)
from src.bricks import EDA, Preprocessor
from src import show_middle_slices

# Afficher la configuration
print("="*60)
print("Configuration de l'environnement")
print("="*60)
print_config()

# Exploration des Donn√©es DICOM

Ce notebook utilise la classe **EDA** pour analyser les donn√©es locales de mani√®re syst√©matique.

In [None]:
# Charger les donn√©es
df_train = pd.read_csv(TRAIN_CSV)
df_localizers = pd.read_csv(TRAIN_LOCALIZERS_CSV)

print(f"üìä Donn√©es charg√©es:")
print(f"  - S√©ries totales: {len(df_train)}")
print(f"  - Localisateurs (avec an√©vrismes): {len(df_localizers)}")

In [None]:
# Cr√©er l'analyseur EDA
eda = EDA(df_train, df_localizers, SERIES_DIR)

print(f"‚úÖ Analyseur EDA cr√©√©")
print(eda)

In [None]:
# 1Ô∏è‚É£ Analyser les modalit√©s
modality_counts = eda.analyze_modalities()

In [None]:
# 2Ô∏è‚É£ Analyser la distribution des an√©vrismes
aneurysm_counts = eda.analyze_aneurysm_distribution()

In [None]:
# 3Ô∏è‚É£ Analyser les positions anatomiques
position_counts = eda.analyze_positions()

In [None]:
# 4Ô∏è‚É£ Visualiser les distributions
eda.plot_aneurysm_distribution()

In [None]:
# 5Ô∏è‚É£ D√©tecter les s√©ries d√©fectueuses (optionnel - peut √™tre long)
# Commentez cette cellule si vous avez beaucoup de s√©ries

# defective_series = eda.detect_defective_series()
print("‚ö†Ô∏è D√©tection des s√©ries d√©fectueuses comment√©e (peut √™tre long)")
print("D√©commentez la cellule pour l'ex√©cuter sur vos 20 s√©ries locales")

In [None]:
# 6Ô∏è‚É£ Analyser le nombre de slices
slice_stats = eda.analyze_slice_counts()

In [None]:
# 7Ô∏è‚É£ Rapport complet
print("\n" + "="*60)
print("RAPPORT COMPLET")
print("="*60)
eda.generate_report()

In [None]:
## Visualisation de S√©ries DICOM

Utilisons le **Preprocessor** pour charger et visualiser des volumes 3D.

In [None]:
# Cr√©er un preprocessor
preprocessor = Preprocessor(target_spacing=(0.4, 0.4, 0.4))

print("‚úÖ Preprocessor cr√©√©")
print(preprocessor)

In [None]:
# S√©lectionner une s√©rie au hasard parmi les locales
import os
import glob

available_series = glob.glob(os.path.join(SERIES_DIR, '*'))

if available_series:
    # Prendre la premi√®re s√©rie disponible
    example_series = available_series[0]
    series_uid = os.path.basename(example_series)
    
    print(f"üìÅ S√©rie s√©lectionn√©e: {series_uid}")
    print(f"üìÇ Chemin: {example_series}")
    
    # Compter les fichiers DICOM
    dicom_files = glob.glob(os.path.join(example_series, '*.dcm'))
    print(f"üî¢ Nombre de slices: {len(dicom_files)}")
else:
    print("‚ùå Aucune s√©rie DICOM trouv√©e dans:", SERIES_DIR)

In [None]:
# Charger et pr√©processer le volume avec le Preprocessor
if available_series:
    print("üîÑ Chargement et preprocessing du volume...")
    volume = preprocessor.process_volume(example_series)
    
    print(f"‚úÖ Volume pr√©process√©:")
    print(f"  - Shape: {volume.shape}")
    print(f"  - Min: {volume.min():.4f}")
    print(f"  - Max: {volume.max():.4f}")
    print(f"  - Mean: {volume.mean():.4f}")
else:
    print("‚ö†Ô∏è Passez √† la cellule suivante")

In [None]:
# Visualiser les coupes du volume avec show_middle_slices
if available_series:
    print("üìä Visualisation des coupes centrales (axiale, coronale, sagittale):")
    show_middle_slices(volume)
else:
    print("‚ö†Ô∏è Aucun volume √† visualiser")

In [None]:
## Visualisation de Plusieurs S√©ries

Visualisons les 3 premi√®res s√©ries disponibles localement.

In [None]:
# Visualiser les 3 premi√®res s√©ries
n_series_to_show = min(3, len(available_series))

for i in range(n_series_to_show):
    series_path = available_series[i]
    series_uid = os.path.basename(series_path)
    
    print(f"\n{'='*60}")
    print(f"S√©rie {i+1}/{n_series_to_show}: {series_uid}")
    print(f"{'='*60}")
    
    try:
        # Preprocessing
        volume = preprocessor.process_volume(series_path)
        print(f"‚úÖ Volume shape: {volume.shape}")
        
        # Visualisation
        show_middle_slices(volume)
        
    except Exception as e:
        print(f"‚ùå Erreur lors du traitement: {e}")

In [None]:
## Conclusion

Ce notebook a utilis√© les composants modulaires de `src/` :

### ‚úÖ Classe EDA
- Analyse des modalit√©s
- Distribution des an√©vrismes
- Positions anatomiques
- D√©tection de s√©ries d√©fectueuses
- Statistiques sur les slices

### ‚úÖ Classe Preprocessor
- Chargement DICOM
- Resampling √† espacement cible
- Cropping du fond
- Normalisation

### ‚úÖ Fonctions de Visualisation
- `show_middle_slices()` pour visualiser les coupes 3D

### üéØ Prochaines √©tapes

Consultez le notebook [02_dataset_creation.ipynb](02_dataset_creation.ipynb) pour cr√©er un dataset d'entra√Ænement √† partir de ces donn√©es.

In [None]:
df['Modality'].value_counts()

In [None]:
df_MRA=df[df['Modality']=='MRA']
df_CTA=df[df['Modality']=='CTA']

In [None]:
df_MRA=df_MRA[['SeriesInstanceUID', 'PatientPath', 'NumSlices', 'Aneurysm Present']]

In [None]:
df_MRA['Aneurysm Present'].value_counts()

In [None]:
df['Aneurysm Present'].value_counts()

In [None]:
df_MRA.shape

In [None]:
df_MRA['NumSlices'].hist(bins=100)
plt.xlabel("Nombre de slices")
plt.ylabel("Nombre de s√©ries")
plt.show()

# Preprocessing des DICOM

In [None]:
# Utiliser SERIES_DIR
series_path = SERIES_DIR
patient_list = glob.glob(series_path+'/*')

In [None]:
# Transformer une s√©rie d'images DICOM en tableau 3D numpy
def dicom_to_numpy(patient_path):
    
    dicom_files = sorted(glob.glob(patient_path+'/*.dcm'))
    slices = [pydicom.dcmread(f) for f in dicom_files]
    #tri des slices par instance number
    slices.sort(key=lambda s: int(s.InstanceNumber))
    
    # On empile les pixel_array en un volume 3D NumPy (X,Y,Z)
    target_shape = slices[0].pixel_array.shape
    slices = [s for s in slices if s.pixel_array.shape == target_shape]
    volume = np.stack([s.pixel_array for s in slices], axis=-1)

    # R√©cup√©ration du spacing r√©el
    pixel_spacing = slices[0].PixelSpacing
    dx, dy = pixel_spacing
    dz = getattr(slices[0], 'SliceThickness', 1.0)  # fallback si manquant
    
    return volume, (dx,dy,dz)

In [None]:
def resample(volume, spacing):
    dx, dy, dz = spacing
    new_volume = zoom(volume, (dx/1, dy/1, dz/1), order=1)
    return new_volume

In [None]:
def crop(volume):
    """
    Coupe le volume 3D pour ne garder que la r√©gion contenant du signal.
    threshold : valeurs < threshold sont consid√©r√©es comme fond/noir.
    """
    # On cr√©e un masque des voxels non nuls
    mask = volume > 10
    if not mask.any():
        return volume  # rien √† couper
    
    # On r√©cup√®re les indices min/max pour chaque dimension
    x_min, x_max = mask.any(axis=(1,2)).nonzero()[0][[0, -1]]
    y_min, y_max = mask.any(axis=(0,2)).nonzero()[0][[0, -1]]
    z_min, z_max = mask.any(axis=(0,1)).nonzero()[0][[0, -1]]
    
    # Crop
    cropped = volume[x_min:x_max+1, y_min:y_max+1, z_min:z_max+1]
    return cropped

In [None]:
def resize(volume):
    """
    Redimensionne le volume 3D √† la taille target_shape par interpolation lin√©aire.
    """
    target_shape=(128,128,64)
    factors = [t/s for t, s in zip(target_shape, volume.shape)]
    resized_volume = zoom(volume, factors, order=1)
    return resized_volume

In [None]:
# Normaliser un tableau numpy 3D
def normalization(volume):
    """
    Normalise un volume entre 0 et 1 (par patient).
    """
    v_min, v_max = volume.min(), volume.max()
    if v_max > v_min:  # √©viter la division par z√©ro
        volume = (volume - v_min) / (v_max - v_min)
    else:
        volume = np.zeros_like(volume)
    return volume

In [None]:
def preprocessing(patient_path):
    
    volume,spacing = dicom_to_numpy(patient_path)
    volume = resample(volume, spacing)
    volume = crop(volume)
    volume = resize(volume)
    volume = normalization(volume)

    return volume

# Creer un Dataset PyTorch

In [None]:
import torch
from torch.utils.data import Dataset

from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader

In [None]:
class AneurysmDataset(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True)  # ton DataFrame avec PatientPath et label
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]                # r√©cup√®re la ligne correspondante
        volume = preprocessing(row['PatientPath'])  # pipeline 3D complet
        label = row['Aneurysm Present']        # 0 ou 1

        # PyTorch attend (C, X, Y, Z) ‚Üí ajouter une dimension channel
        volume = torch.tensor(volume, dtype=torch.float32).unsqueeze(0)
        label = torch.tensor(label, dtype=torch.float32)
        return volume, label


In [None]:
subset_train = df.iloc[:100]  # 50 patients pour train
subset_val = df.iloc[100:150] #10 patients pour validation

train_dataset=AneurysmDataset(subset_train)
val_dataset=AneurysmDataset(subset_val)

train_loader = DataLoader(train_dataset,batch_size=1,shuffle=True)
val_loader = DataLoader(val_dataset,batch_size=1,shuffle=False)

In [None]:
df_train, df_val = train_test_split(df_MRA, test_size=0.2, 
                                                random_state=42, 
                                                stratify=df_MRA['Aneurysm Present'])

In [None]:
train_dataset = AneurysmDataset(df_train)
val_dataset = AneurysmDataset(df_val)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# ResNet 3D
On est pas encore sur un resnet en fait mais sur un CNN 3D

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import roc_auc_score

from tqdm import trange
from tqdm import tqdm

In [None]:
class SimpleResNet3D(nn.Module):
    def __init__(self):
        super(SimpleResNet3D, self).__init__()
        
        self.conv1 = nn.Conv3d(1, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm3d(16)
        
        self.conv2 = nn.Conv3d(16, 32, kernel_size=3, stride=2, padding=1)  # downsample
        self.bn2 = nn.BatchNorm3d(32)
        
        self.conv3 = nn.Conv3d(32, 64, kernel_size=3, stride=2, padding=1)
        self.bn3 = nn.BatchNorm3d(64)
        
        self.conv4 = nn.Conv3d(64, 128, kernel_size=3, stride=2, padding=1)
        self.bn4 = nn.BatchNorm3d(128)
        
        # Global average pooling ‚Üí r√©sum√© du volume
        self.avgpool = nn.AdaptiveAvgPool3d(1)  
        
        # Classification binaire
        self.fc = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        
        x = self.avgpool(x)  # (B, 128, 1, 1, 1)
        x = torch.flatten(x, 1)  # (B, 128)
        x = self.fc(x)  # (B, 1)
        return torch.sigmoid(x)


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SimpleResNet3D().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCELoss()

In [None]:
def train_one_epoch(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    
    for batch_idx, (volumes, labels) in enumerate(tqdm(train_loader)):
        volumes = volumes.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True).unsqueeze(1).float()  # (B,1)
        
        optimizer.zero_grad()
        outputs = model(volumes)  # d√©j√† sigmoid
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * volumes.size(0)

        # Sauvegarde toutes les 10 it√©rations (par ex.)
        if batch_idx % 10 == 0:
            checkpoint_path = f"checkpoint_epoch{epoch}_batch{batch_idx}.pth"
            torch.save({
                'epoch': epoch,
                'batch_idx': batch_idx,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss.item(),
            }, checkpoint_path)
        
    return running_loss / len(train_loader.dataset)

In [None]:
def evaluate(model, val_loader, device):
    model.eval()
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for volumes, labels in tqdm(val_loader):
            volumes = volumes.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True).unsqueeze(1).float()
            
            outputs = model(volumes)  # d√©j√† sigmo√Ød
            preds = outputs  # proba
            
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    
    try:
        auc = roc_auc_score(all_labels, all_preds)
    except ValueError:
        auc = float('nan')  # si une seule classe dans val
    
    return auc

In [None]:
# Charger un mod√®le sauvegard√© localement
# Note: Adapter le chemin vers votre mod√®le local
# state_dict = torch.load(
#     "results/models/best_model_epoch3.pth",
#     map_location="cuda"
# )
# model.load_state_dict(state_dict)

print("‚ö†Ô∏è Cette cellule charge un mod√®le pr√©-entra√Æn√© depuis Kaggle.")
print("Commentez cette cellule si vous n'avez pas encore de mod√®le local.")

In [None]:
# Tester avec un train et val loaders r√©duits
num_epochs = 4

best_auc = 0.5605  # pour sauvegarder le meilleur mod√®le

for epoch in range(3,num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_auc = evaluate(model, val_loader, device)
    
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val AUC: {val_auc:.4f}")

    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), f"best_model_epoch{epoch+1}.pth")
        print(f"--> Mod√®le sauvegard√© √† l'epoch {epoch+1} avec AUC {val_auc:.4f}")


In [None]:
checkpoint = torch.load("/default/1/checkpoint_epoch1_batch140.pth", map_location=device)

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

start_epoch = checkpoint['epoch']
start_batch = checkpoint['batch_idx'] + 1  # on repart juste apr√®s

In [None]:
# Tester avec un train et val loaders r√©duits
num_epochs = 3

best_auc = 0.0  # pour sauvegarder le meilleur mod√®le

for epoch in range(2,num_epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_auc = evaluate(model, val_loader, device)
    
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val AUC: {val_auc:.4f}")

    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), f"best_model_epoch{epoch+1}.pth")
        print(f"--> Mod√®le sauvegard√© √† l'epoch {epoch+1} avec AUC {val_auc:.4f}")