In [2]:
import os
import glob
import numpy as np
import nibabel as nib
from pathlib import Path


In [None]:
RAW_ROOT = Path("../raw_data/segmentation/brats2023_raw")
OUT_ROOT = Path("../raw_data/segmentation/brats2023_preprocessed")


# Taille cible
TARGET_SHAPE = (160, 192, 160)  # (H, W, D)

# On choisit un ordre de canaux cohérent (à garder ensuite dans le Model)
# [T1 = t1 native, T1c, T2w, T2 FLAIR]
MODALITIES = [
    ("t1n", "*-t1n.nii.gz"),
    ("t1c", "*-t1c.nii.gz"),
    ("t2w", "*-t2w.nii.gz"),
    ("t2f", "*-t2f.nii.gz"),
]


In [4]:
## Normalize the images per modality
"""vol: np.ndarray (H, W, D), intensités brutes"""
def normalize_volume(vol):
    mask = vol > 0
    if np.any(mask):
        mean = vol[mask].mean()
        std = vol[mask].std()
        vol[mask] = (vol[mask] - mean) / (std + 1e-8)
    return vol


In [5]:
## Crop/pad au centre pour atteindre TARGET_SHAPE
def crop_or_pad(vol, target_shape):
    H, W, D = vol.shape
    Ht, Wt, Dt = target_shape
    out = np.zeros(target_shape, dtype=vol.dtype)

    h_start = max((Ht - H) // 2, 0)
    w_start = max((Wt - W) // 2, 0)
    d_start = max((Dt - D) // 2, 0)

    h_end = h_start + min(H, Ht)
    w_end = w_start + min(W, Wt)
    d_end = d_start + min(D, Dt)

    vh_start = max((H - Ht) // 2, 0)
    vw_start = max((W - Wt) // 2, 0)
    vd_start = max((D - Dt) // 2, 0)

    vh_end = vh_start + (h_end - h_start)
    vw_end = vw_start + (w_end - w_start)
    vd_end = vd_start + (d_end - d_start)

    out[h_start:h_end, w_start:w_end, d_start:d_end] = vol[vh_start:vh_end,
                                                            vw_start:vw_end,
                                                            vd_start:vd_end]
    return out


In [6]:
def process_case(case_dir: Path):
    vols = []

    # Charger les 4 modalités dans l’ordre défini dans MODALITIES
    for mod_name, pattern in MODALITIES:
        fpath_list = list(case_dir.glob(pattern))
        assert len(fpath_list) == 1, f"Problème pour {mod_name} dans {case_dir}"
        fpath = fpath_list[0]

        nii = nib.load(str(fpath))
        vol = nii.get_fdata().astype(np.float32)
        vol = normalize_volume(vol)
        vol = crop_or_pad(vol, TARGET_SHAPE)
        vols.append(vol)

    # (H,W,D,4)
    img = np.stack(vols, axis=-1).astype(np.float32)

    # Charger le label
    seg_path_list = list(case_dir.glob("*-seg.nii.gz"))
    assert len(seg_path_list) == 1, f"Pas de seg ou multiple seg dans {case_dir}"
    seg_path = seg_path_list[0]

    seg = nib.load(str(seg_path)).get_fdata()
    seg = crop_or_pad(seg, TARGET_SHAPE)
    seg = seg.astype(np.uint8)   # labels {0,1,2,4}
    seg[seg == 4] = 3

    return img, seg


In [11]:
case_dirs = sorted([p for p in RAW_ROOT.iterdir() if p.is_dir()])

In [None]:
OUT_ROOT.mkdir(parents=True, exist_ok=True)
for case_dir in case_dirs:
    case_id = case_dir.name  # ex: BraTS-GLI-00000-000
    out_path = OUT_ROOT / f"{case_id}.npz"

    img, seg = process_case(case_dir)
    np.savez_compressed(out_path, image=img, label=seg)
    print("Saved", out_path)

Saved ../raw_data/segmentation/brats2023_preprocessed/BraTS-GLI-00000-000.npz
Saved ../raw_data/segmentation/brats2023_preprocessed/BraTS-GLI-00002-000.npz
Saved ../raw_data/segmentation/brats2023_preprocessed/BraTS-GLI-00003-000.npz
Saved ../raw_data/segmentation/brats2023_preprocessed/BraTS-GLI-00005-000.npz
Saved ../raw_data/segmentation/brats2023_preprocessed/BraTS-GLI-00006-000.npz


KeyboardInterrupt: 