A script to train a nnU-Net V2 model, for reference.

In [1]:
%load_ext autoreload
%autoreload 2

In [48]:
from utils.common import get_system_info, get_gpu_info
import os
import shutil
import json

display(get_gpu_info())
display(get_system_info())

{'available': True,
 'count': 1,
 'names': ['NVIDIA GeForce RTX 3060 Laptop GPU'],
 'memory_gb': [5.99951171875],
 'cuda_version': '12.6'}

{'platform': 'Linux',
 'python_version': '3.10.12',
 'torch_version': '2.7.0+cu126',
 'cpu_count': 8,
 'cpu_count_logical': 16,
 'memory_gb': 7.6018829345703125}

#### Setup

In [10]:
# https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/installation_instructions.md
# Run only once!
# !git clone https://github.com/MIC-DKFZ/nnUNet.git
# !cd nnUNet; pip install -e .

Cloning into 'nnUNet'...
remote: Enumerating objects: 14008, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 14008 (delta 0), reused 3 (delta 0), pack-reused 14005 (from 1)[K
Receiving objects: 100% (14008/14008), 8.61 MiB | 6.60 MiB/s, done.
Resolving deltas: 100% (10702/10702), done.
Updating files: 100% (246/246), done.
Obtaining file:///mnt/c/Users/abdal/Documents/Projects/ai-notebooks/nnUNet
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
[?25hCollecting dynamic-network-architectures<0.5,>=0.4.1
  Downloading dynamic_network_architectures-0.4.2.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting graphviz
  Using cached graphviz-0.21-py3-none-any.whl (47 kB)
Collecting tifffile
  Down

#### nnunetv2 for 3D data.

Dataset formatting.

In [40]:
# https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/setting_up_paths.md
# https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/dataset_format.md
base_dir = os.getcwd()
data_path = os.path.join(base_dir, 'data')
nnUNet_raw = os.path.join(data_path, 'nnUNet_raw')

# create nnunet_raw dir
os.makedirs(nnUNet_raw, exist_ok=True)

# IBSR18 (3D) dataset for brain tissue segmentation wil be used as an example
# TODO: download the dataset into the data folder
ibsr18_path = os.path.join(data_path, 'IBSR18')

# nnUNet_raw: This is where you place the raw datasets. This folder will have one subfolder for each dataset names DatasetXXX_YYY 
# where XXX is a 3-digit identifier (such as 001, 002, 043, 999, ...) and YYY is the (unique) dataset name.

In [45]:
# create the new dataset folder with nnunet folder naming inside the nnUnet_raw folder
os.makedirs(os.path.join(nnUNet_raw, 'Dataset001_IBSR18'), exist_ok=True)

In [46]:
# Structuring the files inside the dataset folder
# Note that this dataset has a single input channel (one modality type, for example, one of FLAIR, T1w, T1gd or T2w), thus we use _0000

# Create new directory structure
os.makedirs(os.path.join(nnUNet_raw, 'Dataset001_IBSR18', 'imagesTr'), exist_ok=True)
os.makedirs(os.path.join(nnUNet_raw, 'Dataset001_IBSR18', 'imagesTs'), exist_ok=True)
os.makedirs(os.path.join(nnUNet_raw, 'Dataset001_IBSR18', 'labelsTr'), exist_ok=True)

# Function to handle the file copying and renaming
def handle_files(source_folder, dest_path, is_test=False):
    for folder_name in sorted(os.listdir(source_folder)):
        folder_path = os.path.join(source_folder, folder_name)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.nii.gz'):
                    source_file = os.path.join(folder_path, file_name)
                    if '_seg' in file_name:
                        # For segmentation files (labels)
                        dest_file = os.path.join(dest_path, 'labelsTr', folder_name + '.nii.gz')
                    else:
                        # For image files
                        suffix = '_0000.nii.gz'
                        dest_file = os.path.join(dest_path, 'imagesTs' if is_test else 'imagesTr', folder_name + suffix)
                    shutil.copy2(source_file, dest_file)

# Process each set
# Here, 'Dataset001_IBSR18' is the newly named folder
handle_files(source_folder=os.path.join(ibsr18_path, 'Training_Set'), dest_path=os.path.join(nnUNet_raw, 'Dataset001_IBSR18'))
handle_files(source_folder=os.path.join(ibsr18_path, 'Validation_Set'), dest_path=os.path.join(nnUNet_raw, 'Dataset001_IBSR18'))
handle_files(source_folder=os.path.join(ibsr18_path, 'Test_Set'), dest_path=os.path.join(nnUNet_raw, 'Dataset001_IBSR18'), is_test=True)

From the nnUNet git repo: 'For each training case, all images must have the same geometry to ensure that their pixel arrays are aligned. Also make sure that all your data is co-registered!'

In [57]:
# Creating the required json file
def create_dataset_json(parent_dir):
    # Define the structure of the JSON file
    dataset_json = {
        "channel_names": {"0": "T1"},
        "labels": {
            "background": 0,
            "CFS": 1,
            "GM": 2,
            "WM": 3
         }, 
        "numTraining": 0,
        # "numTest": 0,
        # "training": [],
        # "test": [],
        "file_ending": ".nii.gz"
    }

    # Paths for training and test data
    training_images_path = os.path.join(parent_dir, "imagesTr")
    training_labels_path = os.path.join(parent_dir, "labelsTr")
    test_images_path = os.path.join(parent_dir, "imagesTs")

    # Scan for training images and labels
    if os.path.exists(training_images_path) and os.path.exists(training_labels_path):
        # training_images = sorted([f for f in os.listdir(training_images_path) if f.endswith('.nii.gz')])
        training_labels = sorted([f for f in os.listdir(training_labels_path) if f.endswith('.nii.gz')])
        # for img in training_labels:
        #     dataset_json["training"].append({
        #             "image": os.path.join("./imagesTr", img),
        #             "label": os.path.join("./labelsTr", img)
        #         })
        
        dataset_json["numTraining"] = len(training_labels)

    # Scan for test images
    # if os.path.exists(test_images_path):
    #     test_images = sorted([f for f in os.listdir(test_images_path) if f.endswith('.nii.gz')])
    #     for img in test_images:
    #         dataset_json["test"].append(os.path.join("./imagesTs", img))
        
    #     dataset_json["numTest"] = len(dataset_json["test"])

    # Write to JSON file
    try:
        with open(os.path.join(parent_dir, 'dataset.json'), 'w') as outfile:
            json.dump(dataset_json, outfile, indent=4)
            
        print(f"Dataset JSON created successfully")
    except IOError as e:
        print(f"Error creating JSON file: {e}")

create_dataset_json(os.path.join(nnUNet_raw, 'Dataset001_IBSR18'))

Dataset JSON created successfully


Experiment planning and preprocessing.

In [58]:
# nnUNet_raw = ... # already created before
nnUNet_preprocessed = os.path.join(data_path,'nnUNet_preprocessed')
results_folder = os.path.join(data_path,'nnUNet_results')

In [59]:
os.environ["nnUNet_raw"] = str(nnUNet_raw)
os.environ["nnUNet_preprocessed"] = str(nnUNet_preprocessed)
os.environ["nnUNet_results"] = str(results_folder)

In [60]:
# DATASET_ID=001 following the previous naming convension
!nnUNetv2_plan_and_preprocess -d 001 --verify_dataset_integrity

Fingerprint extraction...
Dataset001_IBSR18
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> as reader/writer

####################
verify_dataset_integrity Done. 
If you didn't see any error messages then your dataset is most likely OK!
####################

Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> as reader/writer
100%|███████████████████████████████████████████| 15/15 [00:30<00:00,  2.05s/it]
Experiment planning...

############################
INFO: You are using the old nnU-Net default planner. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Dropping 3d_lowres config because the image size difference to 3d_fullres is too small. 3d_fullres: [115. 139. 147.], 3d_lowres: [115, 139, 147]
2D U-Net configuration:
{'data_identifier': 'nnUNetPlans_2d', 'preprocessor_name': 'DefaultPreprocessor

Model training.

In [63]:
!nnUNetv2_train -h

usage: nnUNetv2_train [-h] [-tr TR] [-p P]
                      [-pretrained_weights PRETRAINED_WEIGHTS]
                      [-num_gpus NUM_GPUS] [--npz] [--c] [--val] [--val_best]
                      [--disable_checkpointing] [-device DEVICE]
                      dataset_name_or_id configuration fold

positional arguments:
  dataset_name_or_id    Dataset name or ID to train with
  configuration         Configuration that should be trained
  fold                  Fold of the 5-fold cross-validation. Should be an int
                        between 0 and 4.

options:
  -h, --help            show this help message and exit
  -tr TR                [OPTIONAL] Use this flag to specify a custom trainer.
                        Default: nnUNetTrainer
  -p P                  [OPTIONAL] Use this flag to specify a custom plans
                        identifier. Default: nnUNetPlans
  -pretrained_weights PRETRAINED_WEIGHTS
                        [OPTIONAL] path to nnU-Net checkpoint file 