In [2]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from scipy.ndimage import rotate, shift,zoom , map_coordinates, gaussian_filter
import random
import os 
import glob
import ast

import matplotlib.pyplot as plt
import pydicom

from tqdm import tqdm

In [3]:
import sys
import numpy as np
sys.path.append("/kaggle/input/fonctions-u")  # adapte le nom exact

from utils import get_instance_number, coordonnee_z, get_center, get_pixelspacing, get_patient_ID, get_position
from utils import show_middle_slices,show_slice_with_point,ajouter_Modality
from utils import dicom_to_numpy,resample,crop,normalization,resample_coordonnees,preprocessing_volume_and_coords,preprocessing_volume
from utils import random_deformation, data_augmentation, dataset_augmented
from utils import extract_positives_cubes, extract_negative_cubes, get_non_overlapping_cubes, dictionnaire_patient, build_dataset_dict
from utils import train_model, combined_loss, evaluate_model, find_best_threshold

In [4]:
df_train = pd.read_csv('/kaggle/input/rsna-intracranial-aneurysm-detection/train.csv')

# **Fonction d'inférence**

In [5]:
def predict_series(dicom_series_path: str,
                   model_dict: dict,
                   cube_size=(48,48,48),
                   stride=(28,28,28),
                   device="cuda",
                   top_k=5,
                   threshold=0.75,
                   method="mean",
                   disable_tqdm=True):
    """
    Retourne un vecteur (14,) de probabilités à partir d'une série DICOM.

    model_dict : {'MRIT1post': model1, 'MRIT2': model2, 'CRA': model3, 'MRA': model4}
    """

    # 1️⃣ Déterminer le type d'IRM
    modality = detect_modality(dicom_series_path)
    model = model_dict[modality].to(device)
    model.eval()

    # 2️⃣ Charger + prétraiter le volume complet
    volume = preprocessing_volume(dicom_series_path)

    # 3️⃣ Générer les mini-cubes
    cubes = extract_cubes(volume, cube_size, stride)  # -> liste de np.array (C,D,H,W)

    all_preds = []
    with torch.no_grad():
        for cube in tqdm(cubes, desc="Inference cubes",disable=disable_tqdm):
            x = torch.from_numpy(cube).unsqueeze(0).unsqueeze(0).float().to(device)
            logits = model(x)
            probs = torch.sigmoid(logits).cpu().numpy().ravel()
            all_preds.append(probs)

    # 4️⃣ Agrégation sur tous les cubes
    all_preds = np.stack(all_preds, axis=0)        # shape = (n_cubes, 14)
    final_pred = aggregate_cubes_predictions(all_preds,top_k,threshold,method)          # max des probabilités

    return final_pred


# **1. Detecter la modalité**

In [6]:
def detect_modality(patient_path: str) -> str:
    """
    Détecte la modalité d'une série DICOM (CTA, MRA, MRI T2, MRI T1post)
    en se basant sur les tags disponibles.
    """
    # On ouvre un des fichiers DICOM de la série
    # (n'importe lequel suffit, on prend le premier)
    first_file = next(
        f for f in os.listdir(patient_path) if f.lower().endswith(".dcm")
    )
    ds = pydicom.dcmread(os.path.join(patient_path, first_file),
                         stop_before_pixels=True)

    modality = getattr(ds, "Modality", "").upper()

    # --- Cas CTA ---
    if modality == "CT":
        return "CTA"

    # --- Cas IRM : MRA ou autres ---
    if modality == "MR":
        # Ici, on n'a pas d'autre tag discriminant dans le test set,
        # donc on doit utiliser un indice externe :
        #   - si tu connais la liste de SeriesInstanceUID correspondant à MRA,
        #     ajoute un mapping dictionnaire ci-dessous.
        #   - sinon, par défaut, on renvoie "MRI T1post"
        # et tu peux ajuster avec une logique maison si tu as un indice
        return "MRA"   # <-- à raffiner selon ton organisation

    raise ValueError(f"Modalité inconnue pour la série {patient_path}")

In [7]:
patient_path='/kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10086325220791440678552106812785190149'
detect_modality(patient_path)

'CTA'

# **2. Preprocessing du volume**

In [8]:
def get_pixelspacing(path):
    """
    Récupère l'espacement voxel (mm) depuis un fichier DICOM.
    Retourne (row_spacing, col_spacing, slice_thickness)
    """
    dcm = pydicom.dcmread(path)

    # Espacement dans le plan (mm/pixel)
    if "PixelSpacing" in dcm:
        row_spacing, col_spacing = [float(x) for x in dcm.PixelSpacing]
    else:
        row_spacing, col_spacing = None, None

    # Épaisseur de coupe (mm)
    slice_thickness = float(getattr(dcm, "SliceThickness", 1.0))

    return row_spacing, col_spacing, slice_thickness


def get_patient_ID(patient_path):
    return str(os.path.basename(patient_path))

In [9]:
def dicom_to_numpy(patient_path):
    
    dicom_files = sorted(glob.glob(patient_path+'/*.dcm'))
    slices = [pydicom.dcmread(f) for f in dicom_files]

    #tri des slices par instance number
    slices.sort(key=lambda s: int(s.InstanceNumber))

    
    # On empile les pixel_array en un volume 3D NumPy (X,Y,Z)
    target_shape = slices[0].pixel_array.shape

    slices = [s for s in slices if s.pixel_array.shape == target_shape]
    volume = np.stack([s.pixel_array for s in slices], axis=-1)

    # Récupération du spacing réel
    pixel_spacing = slices[0].PixelSpacing
    dx, dy = pixel_spacing
    dz = getattr(slices[0], 'SliceThickness', 1.0)  # fallback si manquant
    
    return volume, (dx,dy,dz)

def resample(volume, spacing, target_spacing=(0.4, 0.4, 0.4)):
    zoom_factors = [s / t for s, t in zip(spacing, target_spacing)]
    new_volume = zoom(volume, zoom_factors, order=1)
    return new_volume

def crop(volume):


    # On crée un masque des voxels non nuls
    mask = volume > (volume.max() * 0.1)
    if not mask.any():
        return volume  # rien à couper
    
    # On récupère les indices min/max pour chaque dimension
    x_min, x_max = mask.any(axis=(1,2)).nonzero()[0][[0, -1]] #axe_x
    y_min, y_max = mask.any(axis=(0,2)).nonzero()[0][[0, -1]] #axe_y
    z_min, z_max = mask.any(axis=(0,1)).nonzero()[0][[0, -1]] #axe_z
    
    # Crop
    cropped = volume[x_min:x_max+1, y_min:y_max+1, z_min:z_max+1]
    return cropped, (x_min,y_min,z_min)

def normalization(volume):
    
    v_min, v_max = volume.min(), volume.max()
    if v_max > v_min:  # éviter la division par zéro
        volume = (volume - v_min) / (v_max - v_min)
    else:
        volume = np.zeros_like(volume)
    return volume

In [10]:
def preprocessing_volume(patient_path):
    
    volume,spacing = dicom_to_numpy(patient_path)
    resample_volume = resample(volume, spacing,target_spacing=(0.4, 0.4, 0.4))
    crop_volume, crop_indices = crop(resample_volume)
    new_volume = normalization(crop_volume)

    return new_volume

# **3. Extraction de cubes chevauchant**

In [11]:
def pad_if_needed(cube, cube_size):
    """Pad le cube pour qu'il ait exactement cube_size (z,y,x)."""
    dz, dy, dx = cube_size
    pad_z = max(0, dz - cube.shape[0])
    pad_y = max(0, dy - cube.shape[1])
    pad_x = max(0, dx - cube.shape[2])
    if pad_z > 0 or pad_y > 0 or pad_x > 0:
        cube = np.pad(
            cube,
            ((0, pad_z), (0, pad_y), (0, pad_x)),
            mode='constant',
            constant_values=0
        )
    return cube

def extract_cubes(volume, cube_size=(48,48,48), stride=(24,24,24)):
    """
    Découpe un volume 3D en mini-cubes avec chevauchement.

    Args:
        volume: np.array 3D (D,H,W)
        cube_size: tuple (dz, dy, dx)
        stride: tuple (sz, sy, sx)

    Returns:
        List of np.array cubes, chaque cube de taille cube_size
    """
    D, H, W = volume.shape
    dz, dy, dx = cube_size
    sz, sy, sx = stride

    cubes = []

    for z in range(0, max(D - dz + 1, 1), sz):
        for y in range(0, max(H - dy + 1, 1), sy):
            for x in range(0, max(W - dx + 1, 1), sx):
                cube = volume[z:z+dz, y:y+dy, x:x+dx]
                cube = pad_if_needed(cube, cube_size)
                cubes.append(cube)

    return cubes

# **4. Final prediction**

In [12]:
def aggregate_cubes_predictions(all_preds, top_k=5,threshold=0.75,method="mean"):
    """
    all_preds: np.array shape (n_cubes, 14)
    threshold: float, seuil pour décider anévrisme
    """
    n_cubes = all_preds.shape[0]
    global_probs = all_preds[:, 13]
    
     # 1️⃣ indices des top-k cubes
    k = min(top_k, n_cubes)
    top_k_idx = np.argsort(global_probs)[-k:]


    # 2️⃣ probabilité globale du volume (agrégation des top-k)
    if method == "max":
        volume_prob = np.max(global_probs[top_k_idx])
    elif method == "percentile":
        volume_prob = np.percentile(global_probs[top_k_idx], 90)
    else:  # "mean" par défaut
        volume_prob = global_probs[top_k_idx].mean()

    # 3️⃣ décision de présence d'anévrisme
    if volume_prob < threshold:
        final_vector = np.zeros(14, dtype=float)
    else:
        # Option : moyenne des positions locales des top-k cubes
        positions_avg = all_preds[top_k_idx, :13].mean(axis=0)
        final_vector = np.concatenate([positions_avg, [volume_prob]], axis=0)

    return final_vector

# **5. Définition des modèles**

In [13]:
class ConvBlock3D(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv3d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.InstanceNorm3d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv3d(out_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.InstanceNorm3d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.block(x)


class UNet3DClassifier(nn.Module):
    def __init__(self, in_ch=1, base_ch=16):
        super().__init__()
        # Encoder
        self.enc1 = ConvBlock3D(in_ch, base_ch)
        self.pool1 = nn.MaxPool3d(2)
        self.enc2 = ConvBlock3D(base_ch, base_ch*2)

        self.pool2 = nn.MaxPool3d(2)

        # Bottleneck
        self.bottleneck = ConvBlock3D(base_ch*2, base_ch*4)

        # Decoder
        self.up2 = nn.ConvTranspose3d(base_ch*4, base_ch*2, kernel_size=2, stride=2)
        self.dec2 = ConvBlock3D(base_ch*4, base_ch*2)

        self.up1 = nn.ConvTranspose3d(base_ch*2, base_ch, kernel_size=2, stride=2)
        self.dec1 = ConvBlock3D(base_ch*2, base_ch)

        # Classifier pour 14 sorties (13 positions + 1 label)
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool3d((1,1,1)),
            nn.Flatten(),
            nn.Linear(base_ch, 14)

        )


    def forward(self, x):
        # Encoder
        e1 = self.enc1(x)
        p1 = self.pool1(e1)
        e2 = self.enc2(p1)
        p2 = self.pool2(e2)

        # Bottleneck
        b = self.bottleneck(p2)

        # Decoder
        d2 = self.up2(b)
        d2 = torch.cat([d2, e2], dim=1)
        d2 = self.dec2(d2)

        d1 = self.up1(d2)
        d1 = torch.cat([d1, e1], dim=1)
        d1 = self.dec1(d1)

        out = self.classifier(d1)  # (B,14)
        return out

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_CTA = UNet3DClassifier(in_ch=1, base_ch=32).to(device)
model_CTA.load_state_dict(torch.load("/kaggle/input/modele_cta_22_09/pytorch/default/1/modele_CTA_22_09.pth", map_location=device))
model_CTA = model_CTA.to(device)   # envoyer le modèle sur GPU

model_MRA = UNet3DClassifier(in_ch=1, base_ch=32).to(device)
model_MRA.load_state_dict(torch.load("/kaggle/input/modele_mra_18_09/pytorch/default/1/modele_MRA_18_09.pth", map_location=device))
model_MRA = model_MRA.to(device)   # envoyer le modèle sur GPU

model_MRIT1 = UNet3DClassifier(in_ch=1, base_ch=32).to(device)
model_MRIT1.load_state_dict(torch.load("/kaggle/input/modele_mrit1post_20_09/pytorch/default/1/modele_MRIT1post_20_09.pth", map_location=device))
model_MRIT1 = model_MRIT1.to(device)   # envoyer le modèle sur GPU

model_MRIT2 = UNet3DClassifier(in_ch=1, base_ch=32).to(device)
model_MRIT2.load_state_dict(torch.load("/kaggle/input/modele_mrit2_21_09/pytorch/default/1/modele_MRIT2_21_09.pth", map_location=device))
model_MRIT2 = model_MRIT2.to(device)   # envoyer le modèle sur GPU

In [15]:
model_dict={'MRIT1post': model_MRIT1, 'MRIT2': model_MRIT2, 'CTA': model_CTA, 'MRA': model_MRA}

# **6. Création d'un dataset series et labels**

In [16]:
# colonnes de localisation
loc_cols = [
    'Left Infraclinoid Internal Carotid Artery',
    'Right Infraclinoid Internal Carotid Artery',
    'Left Supraclinoid Internal Carotid Artery',
    'Right Supraclinoid Internal Carotid Artery',
    'Left Middle Cerebral Artery',
    'Right Middle Cerebral Artery',
    'Anterior Communicating Artery',
    'Left Anterior Cerebral Artery',
    'Right Anterior Cerebral Artery',
    'Left Posterior Communicating Artery',
    'Right Posterior Communicating Artery',
    'Basilar Tip',
    'Other Posterior Circulation'
]

# colonnes à garder pour le dataset
cols_to_keep = ['SeriesInstanceUID'] + loc_cols + ['Aneurysm Present']

# nouveau dataframe filtré
df_dataset = df_train[cols_to_keep].copy()

# transformer en liste de dictionnaires pour itérer facilement
dataset = [
    {
        "series_uid": row['SeriesInstanceUID'],
        "locations": row[loc_cols].to_dict(),  # dictionnaire localisation -> 0/1
        "label": row['Aneurysm Present']
    }
    for _, row in df_dataset.iterrows()
]

data_prefix = "/kaggle/input/rsna-intracranial-aneurysm-detection/series/"

dataset = [
    {
        "series_path": os.path.join(data_prefix, row['SeriesInstanceUID']),
        "locations": row[loc_cols].to_dict(),  # dictionnaire localisation -> 0/1
        "label": row['Aneurysm Present']
    }
    for _, row in df_dataset.iterrows()
]

In [17]:
dataset[1]

{'series_path': '/kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10004684224894397679901841656954650085',
 'locations': {'Left Infraclinoid Internal Carotid Artery': 0,
  'Right Infraclinoid Internal Carotid Artery': 0,
  'Left Supraclinoid Internal Carotid Artery': 0,
  'Right Supraclinoid Internal Carotid Artery': 0,
  'Left Middle Cerebral Artery': 0,
  'Right Middle Cerebral Artery': 0,
  'Anterior Communicating Artery': 0,
  'Left Anterior Cerebral Artery': 0,
  'Right Anterior Cerebral Artery': 0,
  'Left Posterior Communicating Artery': 0,
  'Right Posterior Communicating Artery': 0,
  'Basilar Tip': 0,
  'Other Posterior Circulation': 0},
 'label': 0}

# **Essai**

In [20]:
series_path=dataset[1]['series_path']

In [19]:
predict_series(dicom_series_path = dataset[1]['series_path'],
                   model_dict = model_dict,
                   cube_size=(48,48,48),
                   stride=(28,28,28),
                   device="cuda",
                   top_k=5,
                   threshold=0.93,
                   method="mean",
                   disable_tqdm=False)

Inference cubes: 100%|██████████| 3570/3570 [00:35<00:00, 100.46it/s]


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [37]:
n_per_class=50
dataset_subset = (
    [s for s in dataset if s["label"] == 0][:n_per_class] +
    [s for s in dataset if s["label"] == 1][:n_per_class]
)

In [39]:
from sklearn.metrics import roc_auc_score
top_k= 5
threshold = 0.93
methods = "mean"

y_true, y_pred = [], []

for item in tqdm(dataset_subset):
    try:
        pred_vector = predict_series(
            item["series_path"], 
            model_dict, 
            top_k=top_k, 
            threshold=threshold, 
            method=method,
            disable_tqdm=True
        )
        y_true.append(item["label"])
        y_pred.append(pred_vector[13])
    except Exception as e:
        print(f"Skipping series {item['series_path']}: {e}")
        continue

auc = roc_auc_score(y_true, y_pred)
print({"top_k": top_k, "threshold": threshold, "method": method, "AUC": auc})


  4%|▍         | 4/100 [01:53<40:01, 25.02s/it]  

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10012790035410518400400834395242853657: 'FileDataset' object has no attribute 'PixelSpacing'


  8%|▊         | 8/100 [08:06<1:20:37, 52.58s/it] 

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10030804647049037739144303822498146901: 'FileDataset' object has no attribute 'PixelSpacing'


  9%|▉         | 9/100 [08:08<55:23, 36.52s/it]  

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10035782880104673269567641444954004745: 'FileDataset' object has no attribute 'PixelSpacing'


 21%|██        | 21/100 [22:58<47:18, 35.92s/it]   

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10100852389239445465234081623205886374: 'FileDataset' object has no attribute 'PixelSpacing'


 24%|██▍       | 24/100 [23:52<28:14, 22.30s/it]

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10122841756457641138155875644216826804: 'FileDataset' object has no attribute 'PixelSpacing'


 72%|███████▏  | 72/100 [1:03:03<28:01, 60.05s/it] 

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10134365079002163886508836892471866754: 'FileDataset' object has no attribute 'PixelSpacing'


 83%|████████▎ | 83/100 [1:12:58<10:00, 35.30s/it]

Skipping series /kaggle/input/rsna-intracranial-aneurysm-detection/series/1.2.826.0.1.3680043.8.498.10207110118916220264491289532161991004: 'FileDataset' object has no attribute 'PixelSpacing'


100%|██████████| 100/100 [1:32:21<00:00, 55.41s/it]

{'top_k': 5, 'threshold': 0.93, 'method': 'mean', 'AUC': 0.5782407407407408}





# **Bilan provisoire**
- {'top_k': 5, 'threshold': 0.93, 'method': 'mean', 'AUC': 0.775}
- {'top_k': 5, 'threshold': 0.93, 'method': 'percentile', 'AUC': 0.6500000000000001}
- {'top_k': 5, 'threshold': 0.93, 'method': 'max', 'AUC': 0.7000000000000001}
- {'top_k': 4, 'threshold': 0.93, 'method': 'mean', 'AUC': 0.75}
- {'top_k': 5, 'threshold': 0.94, 'method': 'mean', 'AUC': 0.7000000000000001}


Avec 40 éléments :
- {'top_k': 5, 'threshold': 0.93, 'method': 'mean', 'AUC': 0.6852941176470588}
- {'top_k': 5, 'threshold': 0.94, 'method': 'mean', 'AUC': 0.5926470588235295}
- 

## **Bilan provisoire**

Avec les données :
- top_k_list = [3,5]
- threshold_list = [0.93,0.95]
- methods = ["mean"]
on obtient {'top_k': 5, 'threshold': 0.93, 'method': 'mean', 'AUC': 0.775}


Avec les donées :
- top_k_list = [5,7]
- threshold_list = [0.93]
- methods = ["mean","percentile"]
on obtient 'top_k': 7, 'threshold': 0.93, 'method': 'mean', 'AUC': 0.7000000000000001}, {'top_k': 7, 'threshold': 0.93, 'method': 'percentile', 'AUC': 0.7000000000000001}




In [118]:
results = sorted(results, key=lambda x: x["AUC"], reverse=True)
print(results[:10])

[{'top_k': 3, 'threshold': 0.6, 'method': 'mean', 'AUC': 0.7000000000000001}, {'top_k': 3, 'threshold': 0.75, 'method': 'mean', 'AUC': 0.7000000000000001}, {'top_k': 5, 'threshold': 0.6, 'method': 'mean', 'AUC': 0.7000000000000001}, {'top_k': 5, 'threshold': 0.75, 'method': 'mean', 'AUC': 0.7000000000000001}]


In [119]:
print(y_true)
print(y_pred)

[0, 0, 0, 0, 1, 1, 1, 1, 1]
[0.9261791, 0.92711115, 0.92916185, 0.9450248, 0.8902877, 0.9494604, 0.93754804, 0.9430022, 0.950292]
