# Data Preparation for Lesion Segmentation

In [None]:
import os
import shutil
import json

We already preprocessed picai data and private data. But we need to copy them for training and testing. Also we need to create dataset.json file.

In [None]:
def copy_and_create_dataset_json(source_directory, destination_directory, task): 

    test_dir = os.path.join(destination_directory,"imagesTs")
    # Ensure the source directory exists
    if os.path.exists(source_directory) and os.path.isdir(source_directory):
        # Ensure the destination directory exists, create it if not
        if not os.path.exists(test_dir):
            os.makedirs(test_dir)

        # Get a list of all files in the source directory
        files = [f for f in os.listdir(source_directory) if os.path.isfile(os.path.join(source_directory, f))]

        # Copy each file to the destination directory
        for file in files:
            source_file_path = os.path.join(source_directory, file)
            destination_file_path = os.path.join(test_dir, file)
            shutil.copy2(source_file_path, destination_file_path)  # shutil.copy2 preserves metadata

        print("Files copied successfully.")
    else:
        print("Source directory does not exist.")

    context = {
        "task": task,
        "description": "bpMRI scans from PI-CAI dataset to train nnUNet baseline",
        "tensorImageSize": "4D",
        "reference": "",
        "licence": "",
        "release": "1.0",
        "channel_names": {
            "0": "T2W",
            "1": "ADC",
            "2": "HBV"
        },
        "labels": {
            "background": 0,
            "gg1": 1,
            "gg2": 2,
            "gg3": 3,
            "gg4": 4,
            "gg5": 5
        },
        "name": "Hum_AI",
        "numTraining": 0,
        "training": [],
        "numTest": 0,
        "test": [],
        "file_ending": ".nii.gz"
    }

    labels = os.listdir(os.path.join(destination_directory,"labelsTr"))
    context["numTraining"] = len(labels)
    for label in labels:
        context_data = {
            "image": f"./imagesTr/{label}",
            "label": f"./labelsTr/{label}"
            }
        context["training"].append(context_data)
    test_files = os.listdir(test_dir)
    patients = set()
    for file in test_files: 
        patients.add(file[:-12])
    context['numTest'] = len(patients)
    context['test'] = [f"./imagesTs/{i}.nii.gz" for i in patients]
    with open(os.path.join(destination_directory,"dataset.json"), "w") as outfile:
        json.dump(context, outfile, indent=4)

In [None]:
source_directory = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Preprocessed_2/Private_Dataset/imagesTr"

task = "Dataset600_Hum_AI"

destination_directory = f"/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/{task}"
copy_and_create_dataset_json(source_directory,destination_directory,task)

In [None]:
def copy_and_create_dataset_json_777(source_directory, destination_directory, task): 

    test_dir = os.path.join(destination_directory,"imagesTs")
    # Ensure the source directory exists
    if os.path.exists(source_directory) and os.path.isdir(source_directory):
        # Ensure the destination directory exists, create it if not
        if not os.path.exists(test_dir):
            os.makedirs(test_dir)

        # Get a list of all files in the source directory
        files = [f for f in os.listdir(source_directory) if os.path.isfile(os.path.join(source_directory, f))]

        # Copy each file to the destination directory
        for file in files:
            source_file_path = os.path.join(source_directory, file)
            destination_file_path = os.path.join(test_dir, file)
            shutil.copy2(source_file_path, destination_file_path)  # shutil.copy2 preserves metadata

        print("Files copied successfully.")
    else:
        print("Source directory does not exist.")

    context = {
        "task": task,
        "description": "bpMRI scans from PI-CAI dataset to train nnUNet baseline",
        "tensorImageSize": "4D",
        "reference": "",
        "licence": "",
        "release": "1.0",
        "channel_names": {
            "0": "T2W",
            "1": "CT",
            "2": "HBV"
        },
        "labels": {
            "background": 0,
            "gg1": 1,
            "gg2": 2,
            "gg3": 3,
            "gg4": 4,
            "gg5": 5
        },
        "name": "ProstateLesion",
        "numTraining": 0,
        "training": [],
        "numTest": 0,
        "test": [],
        "file_ending": ".nii.gz"
    }

    labels = os.listdir(os.path.join(destination_directory,"labelsTr"))
    context["numTraining"] = len(labels)
    for label in labels:
        context_data = {
            "image": f"./imagesTr/{label}",
            "label": f"./labelsTr/{label}"
            }
        context["training"].append(context_data)
    test_files = os.listdir(test_dir)
    patients = set()
    for file in test_files: 
        patients.add(file[:-12])
    context['numTest'] = len(patients)
    context['test'] = [f"./imagesTs/{i}.nii.gz" for i in patients]
    with open(os.path.join(destination_directory,"dataset.json"), "w") as outfile:
        json.dump(context, outfile, indent=4)

In [None]:
source_directory = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Preprocessed_2/Private_Dataset/imagesTr"
task = "Dataset777_ProstateLesion"

destination_directory = f"/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/{task}"
copy_and_create_dataset_json_777(source_directory,destination_directory,task)

Now its ready to run lesion training and testing. For training go to /src and run sbatch run_training_picai.sh This will take around 5 days.

For inference run sbatch run_lesion_inference.sh This will take 10-20 minutes.