# Data Preparation for Prostate Gland Segmentation

In [None]:
import os
from pathlib import Path
import shutil
import json
import SimpleITK as sitk
from picai_prep import MHA2nnUNetConverter

We need T2w, ADC and DWI(HBV) modalities of each patient. We will select them and put them in a new directory. Files will be renamed based on the convention that nnunet used in pretrained prostate model. Voxel spacing would be set to [0.5,0.5,3] and they will be cropped to (320,320,20) to have a fixed shape on all data.

Eg. xxx_xxx_0000.nii.gz for t2w, xxx_xxx_0001.nii.gz for adc and xxx_xxx_0002.nii.gz for hbv.

In [None]:
def prepare_test_data_with_picai_pipeline(source_dir, base_dir, json_path, preprocessed_path, is_resample):
    from picai_prep.examples.mha2nnunet.picai_archive_inference import generate_mha2nnunet_settings

    subject_list = os.listdir(source_dir)
    mapping = {"Pelvis_t2_spc_rst_tra_p2_iso.mha": "t2w", "Pelvis_ep2d_diff_tra_ADC.mha": "adc", "Pelvis_ep2d_diff_tra.mha": "hbv"}

    for subject in subject_list:
        study_dir = os.path.join(source_dir,subject)
        scan_paths = [
            f"{modality}.mha"
            for modality in ["Pelvis_t2_spc_rst_tra_p2_iso", "Pelvis_ep2d_diff_tra_ADC", "Pelvis_ep2d_diff_tra"]
        ]
        all_scans_found = all([
            os.path.exists(os.path.join(study_dir, path))
            for path in scan_paths
        ])

        if all_scans_found:
            for sequence_name, abbreviation in mapping.items():
                source_path = os.path.join(study_dir,sequence_name)
                filename = f'{subject[-3:]}_{subject[-3:]}_{abbreviation}.mha'
                destination_path = os.path.join(base_dir,subject[-3:],filename)
                os.makedirs(os.path.join(base_dir,subject[-3:]), exist_ok=True)
                shutil.copy(source_path, destination_path)

    generate_mha2nnunet_settings(
        archive_dir = base_dir,
        output_path = json_path,
        task = "Private_Dataset"
    )

    json_file = json_path
    f = open(json_file)
    data = json.load(f)
    f.close()
    if is_resample:
        data["preprocessing"] = {
            # optionally, resample and perform centre crop:
            "matrix_size": [
                20,
                320,
                320
            ],
            "spacing": [
                3.0,
                0.5,
                0.5
            ]
        }
    else:
        data["preprocessing"] = {}
        
    with open(json_file, "w") as outfile:
        json.dump(data, outfile, indent=4)
    
    archive = MHA2nnUNetConverter(
        scans_dir = base_dir,
        output_dir = preprocessed_path,
        mha2nnunet_settings = json_path,
    )
    archive.convert()

Registered data will be processed.

In [None]:
source_dir = "/local_ssd/practical_wise24/prostate_cancer/clean_dataset"
base_dir = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Private_Dataset_Registered"
preprocessed_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Private_Dataset_Registered_Preprocessed"
json_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_private_dataset_registered.json"
prepare_test_data_with_picai_pipeline(source_dir, base_dir, json_path, preprocessed_path, is_resample=True)

Unregistered data will be processed and voxel spacing and resolution will be changed.

In [None]:
source_dir = "/local_ssd/practical_wise24/prostate_cancer/mha_raw_images"
base_dir = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset"
preprocessed_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Preprocessed_2"
json_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/mha2nnunet_settings_private_dataset.json"
prepare_test_data_with_picai_pipeline(source_dir, base_dir, json_path, preprocessed_path, is_resample=True)

Unregistered data will be processed but voxel spacing and resolution will not be changed.

In [None]:
source_dir = "/local_ssd/practical_wise24/prostate_cancer/mha_raw_images"
base_dir = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset"
preprocessed_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Preprocessed"
json_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/mha2nnunet_settings_private_dataset_no_resize.json"
prepare_test_data_with_picai_pipeline(source_dir, base_dir, json_path, preprocessed_path, is_resample=False)

In [None]:
def copy_and_create_dataset_json(source_directory,destination_directory,json_file): 

    # Ensure the source directory exists
    if os.path.exists(source_directory) and os.path.isdir(source_directory):
        # Ensure the destination directory exists, create it if not
        if not os.path.exists(destination_directory):
            os.makedirs(destination_directory)

        # Get a list of all files in the source directory
        files = [f for f in os.listdir(source_directory) if os.path.isfile(os.path.join(source_directory, f)) and not f.endswith("0002.nii.gz")]

        # Copy each file to the destination directory
        for file in files:
            source_file_path = os.path.join(source_directory, file)
            destination_file_path = os.path.join(destination_directory, file)
            shutil.copy2(source_file_path, destination_file_path)  # shutil.copy2 preserves metadata

        print("Files copied successfully.")
    else:
        print("Source directory does not exist.")

    f = open(json_file)
    
    # returns JSON object as a dictionary
    data = json.load(f)
    f.close()
    files = os.listdir(destination_directory)
    brest_patients = set()
    for file in files: 
        brest_patients.add(file[:-12])
    data['numTest'] = len(brest_patients)
    data['test'] = [f"./imagesTs/{i}.nii.gz" for i in brest_patients]

    with open(json_file, "w") as outfile:
        json.dump(data, outfile, indent=4)

Preprocessed data will be transfered to the prostate gland segmentation folder for inference and dataset.json file will be updated.

In [None]:
source_directory = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Private_Dataset_Registered_Preprocessed/Private_Dataset/imagesTr"
destination_directory = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs"
json_file = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/dataset.json"
copy_and_create_dataset_json(source_directory,destination_directory,json_file)

Picai dataset that is annotated by AI will be preprocessed. Voxel spacing and resolution will be set to the provided values.

In [None]:
def convert_picai_ai_annotated_data_to_nnunet_raw_format():
    from picai_prep.examples.mha2nnunet.picai_archive import generate_mha2nnunet_settings

    generate_mha2nnunet_settings(
        archive_dir = "/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/images",
        annotations_dir = "/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/picai_labels/csPCa_lesion_delineations/AI/Bosma22a",
        output_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_ai.json",
        task = "Dataset950_ProstateLesion"
    )

    json_file = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_ai.json"
    
    f = open(json_file)
    data = json.load(f)
    f.close()
    data["preprocessing"] = {
        # optionally, resample and perform centre crop:
        "matrix_size": [
            20,
            320,
            320
        ],
        "spacing": [
            3.0,
            0.5,
            0.5
        ]
    }
    with open(json_file, "w") as outfile:
        json.dump(data, outfile, indent=4)

    archive = MHA2nnUNetConverter(
        scans_dir="/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/images",
        annotations_dir = "/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/picai_labels/csPCa_lesion_delineations/AI/Bosma22a",
        output_dir="/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw",
        mha2nnunet_settings="/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_ai.json",
    )
    archive.convert()
    archive.create_dataset_json()

In [None]:
convert_picai_ai_annotated_data_to_nnunet_raw_format()

Picai dataset that is annotated by human experts will be preprocessed. Voxel spacing and resolution will be set to the provided values.

In [None]:
def convert_picai_human_annotated_data_to_nnunet_raw_format():
    from picai_prep.examples.mha2nnunet.picai_archive import generate_mha2nnunet_settings

    generate_mha2nnunet_settings(
        archive_dir = "/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/images",
        annotations_dir = "/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/picai_labels/csPCa_lesion_delineations/human_expert/resampled",
        output_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_human.json",
        task = "Dataset999_ProstateLesion"
    )

    json_file = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_human.json"
    
    f = open(json_file)
    data = json.load(f)
    f.close()
    data["preprocessing"] = {
        # optionally, resample and perform centre crop:
        "matrix_size": [
            20,
            320,
            320
        ],
        "spacing": [
            3.0,
            0.5,
            0.5
        ]
    }
    with open(json_file, "w") as outfile:
        json.dump(data, outfile, indent=4)

    archive = MHA2nnUNetConverter(
        scans_dir="/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/images",
        annotations_dir="/local_ssd/practical_wise24/prostate_cancer/PICAIDataset/input/picai_labels/csPCa_lesion_delineations/human_expert/resampled",  # defaults to input_path
        output_dir="/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw",
        mha2nnunet_settings="/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/mha2nnunet_settings_human.json",
    )
    archive.convert()
    archive.create_dataset_json()

In [None]:
convert_picai_human_annotated_data_to_nnunet_raw_format()

Patients that has mask in their labels are selected.

In [None]:
def get_file_names(path):
    files = os.listdir(path)
    patient_list = list()
    for file in files:
        if file.endswith('.nii.gz'):
            image = sitk.ReadImage(os.path.join(path, file))
            image_array = sitk.GetArrayFromImage(image)

            # Check if any pixel value is non-zero
            if any(image_array.flatten()):
                filename = file.split('_')[0]
                patient_list.append(filename)

    print(len(patient_list))
    return patient_list

source_path_human = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset999_ProstateLesion/labelsTr'
patients_with_mask_human = get_file_names(source_path_human)

source_path_ai = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset950_ProstateLesion/labelsTr'
patients_with_mask_ai = get_file_names(source_path_ai)
filtered_ai_list = list(filter(lambda value: value not in patients_with_mask_human, patients_with_mask_ai))


Selected patients data transferred to training folder.

In [None]:
import shutil

def copy_human_or_ai(source_path,output_path,dirs):
    main_files = os.listdir(os.path.join(source_path, "imagesTr"))
    label_files = os.listdir(os.path.join(source_path, "labelsTr"))

    for d in dirs:
        files = [file for file in main_files if file.startswith(d)]
        for file in files:
            src = os.path.join(source_path,"imagesTr",file)
            dst = os.path.join(output_path,"imagesTr",file)
            shutil.copy(src,dst)
        label = [file for file in label_files if file.startswith(d)]
        for file in label:
            src = os.path.join(source_path,"labelsTr",file)
            dst = os.path.join(output_path,"labelsTr",file)
            shutil.copy(src,dst)

human_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset999_ProstateLesion"
ai_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset950_ProstateLesion"
output_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset600_Hum_AI"
Path(os.path.join(output_path, "imagesTr")).mkdir(parents=True, exist_ok=True)
Path(os.path.join(output_path, "imagesTs")).mkdir(parents=True, exist_ok=True)
Path(os.path.join(output_path, "labelsTr")).mkdir(parents=True, exist_ok=True)

copy_human_or_ai(human_path,output_path,patients_with_mask_human)
copy_human_or_ai(ai_path,output_path,filtered_ai_list)

Patient files copied and dataset.json file updated.

In [None]:
source_directory = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset600_Hum_AI/imagesTr"
destination_directory = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs"
json_file = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/dataset.json"
copy_and_create_dataset_json(source_directory,destination_directory,json_file)

Now you can run prostate gland segmentation model. 

Change working directory to /prostate-cancer-aggressiveness-prediction/src

Then run sbatch run_prediction_nnunet.sh 