In [1]:
import json
import os
import shutil
from loguru import logger
from mbas.data.nifti import get_subject_folders
from tqdm import tqdm

Following these instructions
https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/dataset_format.md

Generates a dataset.json file in the output folder

    channel_names:
        Channel names must map the index to the name of the channel, example:
        {
            0: 'T1',
            1: 'CT'
        }
        Note that the channel names may influence the normalization scheme!! Learn more in the documentation.

    labels:
        This will tell nnU-Net what labels to expect. Important: This will also determine whether you use region-based training or not.
        Example regular labels:
        {
            'background': 0,
            'left atrium': 1,
            'some other label': 2
        }
        Example region-based training:
        {
            'background': 0,
            'whole tumor': (1, 2, 3),
            'tumor core': (2, 3),
            'enhancing tumor': 3
        }

Remember that nnU-Net expects consecutive values for labels! nnU-Net also expects 0 to be background!

num_training_cases: is used to double check all cases are there!

- file_ending: needed for finding the files correctly. IMPORTANT! File endings must match between images and segmentations!

- dataset_name, reference, release, license, description: self-explanatory and not used by nnU-Net. Just for completeness and as a reminder that these would be great!

- overwrite_image_reader_writer: If you need a special IO class for your dataset you can derive it from BaseReaderWriter, place it into nnunet.imageio and reference it here by name

In [3]:
channel_names = {
    "0": "LGE_MRI",
}
labels = {
    "background": 0,
    "atrium": 1,
}

In [4]:
MBAS_training = "/home/bryan/data/MBAS/Training"
MBAS_validation = "/home/bryan/data/MBAS/Validation"

nnUNet_raw = "/home/bryan/data/nnUNet_raw"
dataset_folder = os.path.join(nnUNet_raw, "Dataset104_MBAS")
dataset_json_path = os.path.join(dataset_folder, "dataset.json")

In [5]:
num_training_cases = len(get_subject_folders(MBAS_training))
num_training_cases

70

In [7]:
dataset_json = {
    "name": "MBAS",
    "release": "1.0",
    "channel_names": channel_names,
    "labels": labels,
    "numTraining": num_training_cases,
    "file_ending": ".nii.gz",
    "overwrite_image_reader_writer": "SimpleITKIO",
    
}

os.makedirs(dataset_folder, exist_ok=True)
with open(dataset_json_path, "w") as f:
    json.dump(dataset_json, f, indent=2, sort_keys=False)

In [8]:
def copy_to_nnunet_folder(mbas_folder, nnunet_folder, training = True, dry_run=False):
    nnunet_data_folder = os.path.join(nnunet_folder, "imagesTr") if training else os.path.join(nnunet_folder, "imagesTs")
    nnunet_labels_folder = os.path.join(nnunet_folder, "labelsTr")
    
    if not dry_run:
        os.makedirs(nnunet_data_folder, exist_ok=True)
        os.makedirs(nnunet_labels_folder, exist_ok=True)
    
    for file_name in tqdm(os.listdir(mbas_folder)):
        file_path = os.path.join(mbas_folder, file_name)
        if not os.path.isdir(file_path):
            continue

        patient_files = os.listdir(file_path)
        # check if folder contains a MBAS_XXX_gt.nii.gz file
        for p_file in patient_files:
            case_identifier = p_file[:len("MBAS_XXX")]
            if p_file.endswith("gt.nii.gz"):
                nnunet_file = os.path.join(nnunet_data_folder, f"{case_identifier}_0000.nii.gz")
            elif p_file.endswith("binary_label.nii.gz"):
                nnunet_file = os.path.join(nnunet_labels_folder, f"{case_identifier}.nii.gz")
            else:
                continue
            
            p_filepath = os.path.join(file_path, p_file)
            
            
            if dry_run:
                logger.info(f"Copying {p_filepath} -> {nnunet_file}")
            else:
                shutil.copy(p_filepath, nnunet_file)

In [10]:
copy_to_nnunet_folder(
    MBAS_training,
    dataset_folder,
    training = True,
    dry_run = False
)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:03<00:00, 22.47it/s]


In [11]:
copy_to_nnunet_folder(
    MBAS_validation,
    dataset_folder,
    training = False,
    dry_run = False
)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:01<00:00, 25.17it/s]
