In [1]:
import os
import sys
import shutil
import json

# set environment variable here.
os.environ["nnUNet_preprocessed"] = "/home/bhatti_uhn/nnUNet_preprocessed"
os.environ["nnUNet_results"] = "/home/bhatti_uhn/nnUNet_results"
os.environ["nnUNet_raw"] = "/home/bhatti_uhn/nnUNet_raw"

# Ensure that environment variables are set correctly # from run_training.py
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

from nnunetv2.dataset_conversion import generate_dataset_json
from nnunetv2.run.run_training import run_training_entry
import logging

logging.basicConfig(level=logging.WARNING)

In [2]:
# Define paths
base_dir = '/home/bhatti_uhn/Dataset/UHN-MedImg3D-ML-quiz'
nnunet_base = '/home/bhatti_uhn/nnUNet_raw/Dataset876_UHNMedImg3D'

images_tr_dir = os.path.join(nnunet_base, 'imagesTr')
labels_tr_dir = os.path.join(nnunet_base, 'labelsTr')
images_ts_dir = os.path.join(nnunet_base, 'imagesTs')

# Create directories
os.makedirs(images_tr_dir, exist_ok=True)
os.makedirs(labels_tr_dir, exist_ok=True)
os.makedirs(images_ts_dir, exist_ok=True)
    

trainSrc = os.path.join(base_dir, 'train')
testSrc = os.path.join(base_dir, 'test')

# list subdirectories of trainSrc # remove .DS_Store
train_subdirs = [sdir for sdir in os.listdir(trainSrc) if not sdir.startswith('.')]
train_subdirs = sorted(train_subdirs)
train_subdirs

def copy_files_to_raw(images_tr_dir, labels_tr_dir, trainSrc, train_subdirs):

    for i, subdir in enumerate(train_subdirs):
        print(f"Processing {subdir}...")

    # create path for reading subdirectories
        train_subdir = os.path.join(trainSrc, subdir)

    # list files in subdirectory
        files = [s for s in sorted(os.listdir(train_subdir)) if not s.startswith('.')]

    # separate images and labels files: image: 'quiz_2_002_0000.nii.gz', and label: 'quiz_2_002.nii.gz'
        for j, fileName in enumerate(files):
            if len(fileName.split('_')) == 4: # assuming quiz_subType_patID is a valid CASE_ID. and 0000 is the modality.
                fileNameSplit = fileName.split('_')
                logging.info(f"Processing {fileNameSplit}")

                newFileName = fileNameSplit[0] + '_' + fileNameSplit[1] + '_' + fileNameSplit[2] + '_' + fileNameSplit[3]
                tr_copyPath = os.path.join(train_subdir, fileName)

                logging.info(f"Copying {tr_copyPath} to {images_tr_dir}")
                shutil.copy(tr_copyPath, images_tr_dir)
                # rename the file
                # os.rename(os.path.join(images_tr_dir, fileName), os.path.join(images_tr_dir, newFileName))

            elif len(fileName.split('_')) == 3:
                fileNameSplit = fileName.split('_')
                newFileName = fileNameSplit[0] + '_' + fileNameSplit[1] + '_' + fileNameSplit[2]

                shutil.copy(os.path.join(train_subdir, fileName), labels_tr_dir)
                # rename the file
                # os.rename(os.path.join(labels_tr_dir, fileName), os.path.join(labels_tr_dir, newFileName))

            else: 
                raise ValueError(f"File {fileName} does not match the expected format.")


def copy_testFiles_to_raw(images_ts_dir, testSrc):

    files = [s for s in sorted(os.listdir(testSrc)) if not s.startswith('.')]

    for j, fileName in enumerate(files):
        shutil.copy(os.path.join(testSrc, fileName), images_ts_dir)

# ## uncomment to run ###
# copy_files_to_raw(images_tr_dir, labels_tr_dir, trainSrc, train_subdirs)
# copy_testFiles_to_raw(images_ts_dir, testSrc)

In [3]:
# Interesting. label integrity check fails. Expected: [np.int64(0), np.int64(1), np.int64(2)] Found: [0.        1.0000153 2.       ]
# fixing the label files

import SimpleITK as sitk
import numpy as np

# Function to correct labels
def correct_labels(image):
    array = sitk.GetArrayFromImage(image)
    array = np.where(np.isclose(array, 1.0000153), 1, array)  # Correcting the label value
    array = np.int64(array)
    corrected_image = sitk.GetImageFromArray(array)
    corrected_image.CopyInformation(image)
    return corrected_image

# Load the problematic image
def correct_all_type_labels(labels_tr_dir):
    labelImagesInDir = [sdir for sdir in sorted(os.listdir(labels_tr_dir)) if not sdir.startswith('.')]

    for labelImgIdx, labelImg in enumerate(labelImagesInDir):
        imagePath = os.path.join(labels_tr_dir, labelImg)
        image = sitk.ReadImage(imagePath)

    # Correct the labels
        corrected_image = correct_labels(image)
        sitk.WriteImage(corrected_image, imagePath)


# ## uncomment to run ###
# correct_all_type_labels(labels_tr_dir)

# create dataset.json
channel_names = {0: "CT"}

labels = {
    'background': 0,
    'pancreas': 1,
    'lesion': 2
}

num_training_cases = len(os.listdir(images_tr_dir))
file_ending = '.nii.gz'

# ## uncomment to run ###
# generate_dataset_json.generate_dataset_json(nnunet_base, channel_names, labels, num_training_cases, file_ending)

In [4]:
def correct_spacing(image_path, seg_path):
    # Load the image and segmentation
    image = sitk.ReadImage(image_path)
    seg = sitk.ReadImage(seg_path)
    
    # Get the spacing from the image
    image_spacing = image.GetSpacing()
    seg_spacing = seg.GetSpacing()
    
    # Compare and correct spacing if needed
    if not np.allclose(image_spacing, seg_spacing, atol=1e-7):
        print(f"Correcting spacing for {seg_path}")
        seg.SetSpacing(image_spacing)
        
        # Save the corrected segmentation
        sitk.WriteImage(seg, seg_path)
        print(f"Corrected segmentation saved to {seg_path}")

# Correct spacing for the segmentation files
# list all files under imagesTr

#  ## uncomment to run ###
# imageFilesInDir = [sdir for sdir in sorted(os.listdir(images_tr_dir)) if not sdir.startswith('.')]

# for imageIdx, image in enumerate(imageFilesInDir):
#     imgPath = os.path.join(images_tr_dir, image)
#     segPath = os.path.join(labels_tr_dir, image.replace('_0000', ''))

#     correct_spacing(imgPath, segPath)

In [5]:
fold = 0

# Define the necessary arguments
args = [
    "script_name",  # This is a placeholder for the script name
    "Dataset876_UHNMedImg3D",  # dataset_name_or_id
    "3d_fullres",  # configuration
    f"{fold}",  # fold
    # '-tr', 'nnUNetTrainer',  # optional: trainer_class_name
    # '-p', 'nnUNetPlans',  # optional: plans_identifier
]

# Set sys.argv to the list of arguments
sys.argv = args

# Run the training entry function
run_training_entry()


############################
INFO: You are using the old nnU-Net default plans. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################

2024-08-02 21:13:48.724400: do_dummy_2d_data_aug: False
2024-08-02 21:13:48.727896: Using splits from existing split file: /home/bhatti_uhn/nnUNet_preprocessed/Dataset876_UHNMedImg3D/splits_final.json
2024-08-02 21:13:48.728107: The split file contains 5 splits.
2024-08-02 21:13:48.728178: Desir

  self.grad_scaler = GradScaler() if self.device.type == 'cuda' else None


using pin_memory on device 0


KeyboardInterrupt: 

In [4]:
!nnUNetv2_plan_and_preprocess -d 876 --verify_dataset_integrity

nnUNet_raw is not defined and nnU-Net can only be used on data for which preprocessed files are already present on your system. nnU-Net cannot be used for experiment planning and preprocessing like this. If this is not intended, please read documentation/setting_up_paths.md for information on how to set this up properly.
nnUNet_preprocessed is not defined and nnU-Net can not be used for preprocessing or training. If this is not intended, please read documentation/setting_up_paths.md for information on how to set this up.
nnUNet_results is not defined and nnU-Net cannot be used for training or inference. If this is not intended behavior, please read documentation/setting_up_paths.md for information on how to set this up.
Fingerprint extraction...
Traceback (most recent call last):
  File "/home/uhn_venv/bin/nnUNetv2_plan_and_preprocess", line 8, in <module>
    sys.exit(plan_and_preprocess_entry())
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/bhatti_uhn/nnU-net/nnunetv2/experi

In [None]:
# # Assuming your dataset has 'images' and 'labels' subdirectories
# move_and_rename_files(os.path.join(base_dir, 'images'), images_tr_dir, 'patient')
# move_and_rename_files(os.path.join(base_dir, 'labels'), labels_tr_dir, 'patient', is_label=True)

# # Create dataset.json
# dataset_json = {
#     "name": "UHNMedImg3D",
#     "description": "UHN Medical Imaging 3D Machine Learning Quiz Dataset",
#     "tensorImageSize": "4D",
#     "reference": "",
#     "licence": "",
#     "release": "0.0",
#     "modality": {
#         "0": "CT"
#     },
#     "labels": {
#         "0": "background",
#         "1": "label1",
#         "2": "label2"
#     },
#     "numTraining": len(os.listdir(images_tr_dir)),
#     "numTest": 0,  # Update this if you have test images
#     "training": [
#         {
#             "image": f"./imagesTr/patient_{i+1:03d}_0000.nii.gz",
#             "label": f"./labelsTr/patient_{i+1:03d}.nii.gz"
#         } for i in range(len(os.listdir(images_tr_dir)))
#     ],
#     "test": []  # Update this if you have test images
# }

# # Save dataset.json
# with open(os.path.join(nnunet_base, 'dataset.json'), 'w') as f:
#     json.dump(dataset_json, f, indent=4)

# print("Dataset arranged successfully.")