In [16]:
!pip install monai

Collecting monai
  Downloading monai-1.5.0-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch<2.7.0,>=2.4.1->monai)
 

In [17]:
import os
import nibabel as nib
import numpy as np
from glob import glob

"""
    Get data inputs, assumes CT volumes and segmentation masks have corresponding names and indices.
    Analyze the nifti datasets for MONAI parameter adjustments
    :param str in_dir: file path of data.
"""
def prepare_and_configure(in_dir):
    volume_dict = {}
    segmentation_dict = {}

    # find all .nii files under in_dir
    nii_files = glob(os.path.join(in_dir, "**", "*.nii"), recursive=True)

    for filepath in nii_files:
        filename = os.path.basename(filepath)
        if filename.startswith("volume-"):
            idx = int(filename.split("-")[1].split(".")[0])
            volume_dict[idx] = filepath
        elif filename.startswith("segmentation-"):
            idx = int(filename.split("-")[1].split(".")[0])
            segmentation_dict[idx] = filepath

    # match volume and segmentation by idx
    matched_keys = sorted(set(volume_dict.keys()) & set(segmentation_dict.keys()))
    all_files = [{"vol": volume_dict[k], "seg": segmentation_dict[k]} for k in matched_keys]

    # split 80% train / 20% test
    split_idx = int(0.8 * len(all_files))
    train_files = all_files[:split_idx]
    test_files = all_files[split_idx:]
    
    # analyze voxel sizes and shapes
    voxel_sizes = []
    shapes = []
    for k in matched_keys:
        img = nib.load(volume_dict[k])
        data = img.get_fdata()
        voxel_sizes.append(img.header.get_zooms())
        shapes.append(data.shape)

    mean_spacing = np.mean(voxel_sizes, axis=0)
    mean_shape = np.mean(shapes, axis=0)
    pixdim = tuple(round(s, 2) for s in mean_spacing)
    a_min, a_max = -200, 250

    # detect GPU memory
    try:
        import GPUtil
        gpus = GPUtil.getGPUs()
        mem_free = max([gpu.memoryFree for gpu in gpus])  # in MB
    except Exception:
        mem_free = 0  # fallback to CPU

    # adjust preprocessing resolution based on memory
    if mem_free >= 20000:
        spatial_size = [256, 256, 256]
        batch_size = 2
    elif mem_free >= 10000:
        spatial_size = [192, 192, 128]
        batch_size = 1
    elif mem_free >= 4000:
        spatial_size = [128, 128, 64]
        batch_size = 1
    else:
        spatial_size = [96, 96, 64]
        batch_size = 1

    return {
        "train_files": train_files,
        "test_files": test_files,
        "pixdim": pixdim,
        "a_min": a_min,
        "a_max": a_max,
        "spatial_size": spatial_size,
        "batch_size": batch_size,
        "mem_free_MB": mem_free
    }

In [18]:
"""
Written by Haifaa with sources:
https://github.com/Project-MONAI/tutorials
"""

import re
from glob import glob
from monai.transforms import (
    Compose,
    EnsureChannelFirstD,
    LoadImaged,
    Resized,
    ToTensord,
    Spacingd,
    Orientationd,
    ScaleIntensityRanged,
    CropForegroundd,
)
from monai.data import DataLoader, Dataset, CacheDataset
from monai.utils import set_determinism

"""
    Use MONAI transforms to prepares data for segmentation.
    Voxel: 3D grid representation of data.
    
    :param tuple pixdim: standard voxel spacing (in millimeters) for resampling the images in the x, y, and z dimensions.
    :param int a_min: intensity voxel min for CT scans (less are clipped before scaling).
    :param int a_max: intensity voxel max for CT scans (more are clipped before scaling).
    :param int array spatial_size: output size (in voxel) to which each image and label volume will be resized. AKA input size for the neural network.
    :param int batch_size: adjyst batch size, default is 1.
    :return PyTorch DataLoader objects: used to train neural network.
"""
def preprocess(pixdim, a_min, a_max, spatial_size, batch_size, train_files, test_files):

    # reproduce training results
    set_determinism(seed=0)

    # and apply transformations to them
    train_transforms = Compose([
        LoadImaged(keys=["vol", "seg"]),
        EnsureChannelFirstD(keys=["vol", "seg"]),
        Spacingd(keys=["vol", "seg"], pixdim=pixdim, mode=("bilinear", "nearest")),
        Orientationd(keys=["vol", "seg"], axcodes="RAS"),
        ScaleIntensityRanged(keys=["vol"], a_min=a_min, a_max=a_max, b_min=0.0, b_max=1.0, clip=True),
        CropForegroundd(keys=["vol", "seg"], source_key="vol"),
        Resized(keys=["vol", "seg"], spatial_size=spatial_size),
        ToTensord(keys=["vol", "seg"]),
    ])

    # transforms for test data
    test_transforms = Compose([
        LoadImaged(keys=["vol", "seg"]),
        EnsureChannelFirstD(keys=["vol", "seg"]),
        Spacingd(keys=["vol", "seg"], pixdim=pixdim, mode=("bilinear", "nearest")),
        Orientationd(keys=["vol", "seg"], axcodes="RAS"),
        ScaleIntensityRanged(keys=["vol"], a_min=a_min, a_max=a_max, b_min=0.0, b_max=1.0, clip=True),
        CropForegroundd(keys=["vol", "seg"], source_key="vol"),
        Resized(keys=["vol", "seg"], spatial_size=spatial_size),
        ToTensord(keys=["vol", "seg"]),
    ])
    
    train_ds = Dataset(data=train_files, transform=train_transforms)
    test_ds = Dataset(data=test_files, transform=test_transforms)

    train_loader = DataLoader(train_ds, batch_size=batch_size)
    test_loader = DataLoader(test_ds, batch_size=batch_size)

    return train_loader, test_loader

2025-06-20 16:34:49.351657: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750437289.537428      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750437289.595232      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [19]:
# usage flow

# 1. user input (for now, it is kaggle data set)
params = prepare_and_configure(in_dir="/kaggle/input")

# testing
print("prepare_and_configure:")
for k, v in params.items():
    print(f"{k}: {v}")

# preprocess & show reasoning
train_loader, test_loader = preprocess(pixdim=params['pixdim'], a_min=params['a_min'], a_max=params['a_max'], spatial_size=params['spatial_size'], batch_size=params['batch_size'], train_files=params['train_files'], test_files=params['test_files'])
print(train_loader)
print(test_loader)

#

prepare_and_configure:
train_files: [{'vol': '/kaggle/input/liver-tumor-segmentation/volume_pt1/volume-0.nii', 'seg': '/kaggle/input/liver-tumor-segmentation/segmentations/segmentation-0.nii'}, {'vol': '/kaggle/input/liver-tumor-segmentation/volume_pt1/volume-1.nii', 'seg': '/kaggle/input/liver-tumor-segmentation/segmentations/segmentation-1.nii'}, {'vol': '/kaggle/input/liver-tumor-segmentation/volume_pt1/volume-2.nii', 'seg': '/kaggle/input/liver-tumor-segmentation/segmentations/segmentation-2.nii'}, {'vol': '/kaggle/input/liver-tumor-segmentation/volume_pt1/volume-3.nii', 'seg': '/kaggle/input/liver-tumor-segmentation/segmentations/segmentation-3.nii'}, {'vol': '/kaggle/input/liver-tumor-segmentation/volume_pt1/volume-4.nii', 'seg': '/kaggle/input/liver-tumor-segmentation/segmentations/segmentation-4.nii'}, {'vol': '/kaggle/input/liver-tumor-segmentation/volume_pt1/volume-5.nii', 'seg': '/kaggle/input/liver-tumor-segmentation/segmentations/segmentation-5.nii'}, {'vol': '/kaggle/inpu