# Prostate Gland Extraction

Segmented prostate gland area will be extracted from all modalities.

In [None]:
import os
import numpy as np
import nibabel as nib
from pathlib import Path

Find the patient and the slice that has the biggest prostate area in slices. Then get the height width and depth of the Prostate mask area. We then find the largest prostate area in each patient, position it in the centre and extract the area with the provided width, height and depth.

In [None]:
def find_max_tumor_slice(mask_path):
    mask_img = nib.load(mask_path)
    mask_data = mask_img.get_fdata()
    tumor_slices = [(slice_number, mask_data[:, :, slice_number].sum()) for slice_number in range(mask_data.shape[2])]
    max_tumor_slice = max(tumor_slices, key=lambda x: x[1])
    return max_tumor_slice, mask_data

def get_tumor_coordinates(mask_data, threshold=0):
    tumor_coordinates = np.argwhere(mask_data > threshold)
    min_coords = np.min(tumor_coordinates, axis=0)
    max_coords = np.max(tumor_coordinates, axis=0)
    return min_coords, max_coords

def get_tumor_dimensions(min_coords, max_coords):
    width = max_coords[0] - min_coords[0] + 1
    height = max_coords[1] - min_coords[1] + 1
    depth = max_coords[2] - min_coords[2] + 1
    return width, height, depth

def extract_tumor_region(original_path, mask_path, output_path, patient_number, width, height, depth, width_slack=0, height_slack=0):
    
    file = [d for d in os.listdir(mask_path) if d.endswith('nii.gz') and d.startswith(patient_number)]
    mask = nib.load(os.path.join(mask_path,file[0]))

    # Get data arrays
    mask_data = mask.get_fdata()

    # Find the slice with the largest tumor region
    tumor_slices = np.sum(mask_data, axis=(0, 1))
    largest_slice = np.argmax(tumor_slices)

    # Find the center of the tumor region in the largest slice
    tumor_indices = np.argwhere(mask_data[:, :, largest_slice] > 0)
    center = np.mean(tumor_indices, axis=0)

    start = max(0, (2*largest_slice - depth) // 2)
    end = min(mask_data.shape[2], (2*largest_slice + depth) // 2)
    if (2*largest_slice - depth) // 2 < 0:
        start = 0
        end = depth
    
    if (2*largest_slice + depth) // 2 > mask_data.shape[2]:
        start = mask_data.shape[2] - depth
        end = mask_data.shape[2]

    h_s = center[0] - (height / 2 + height_slack)
    h_e = center[0] + (height / 2 + height_slack)
    w_s = center[1] - (width / 2 + width_slack)
    w_e = center[1] + (width / 2 + width_slack)

    if center[0] - (height / 2 + height_slack) < 0:
        h_s = 0
        h_e = height + height_slack
    if center[0] + (height / 2 + height_slack) > mask_data.shape[0]:
        h_s = mask_data.shape[0] - (height + height_slack)
        h_e = mask_data.shape[0]
    if center[1] - (width / 2 + width_slack) < 0:
        w_s = 0
        w_e = width + width_slack
    if center[1] + (width / 2 + width_slack) > mask_data.shape[1]:
        w_s = mask_data.shape[1] - (width + width_slack)
        w_e = mask_data.shape[1]

    
    # Extract the mask region based on the same coordinates
    extracted_mask = mask_data[
        int(h_s):int(h_e),
        int(w_s):int(w_e),
        start:end
    ]

    new_mask = nib.Nifti1Image(extracted_mask, mask.affine)
    nib.save(new_mask, os.path.join(output_path, f'{patient_number}', file[0]))
    
    files = os.listdir(original_path)

    # Filter files that start with patient_number
    files = [file for file in files if file.startswith(patient_number)]

    for file in files:
        # Load NIfTI files
        img = nib.load(os.path.join(original_path,file))

        image_data = img.get_fdata()
        # Extract the image region based on the provided width, height, and depth with slack
        extracted_image = image_data[
            int(h_s):int(h_e),
            int(w_s):int(w_e),
            start:end
        ]
        # Set pixels in the original image to zero where the mask is black
        modified_data = np.where(extracted_mask == 0, 0, extracted_image)

        # Save the extracted regions to new NIfTI files
        new_img = nib.Nifti1Image(modified_data, img.affine)
        nib.save(new_img, os.path.join(output_path, f'{patient_number}', file))

In [None]:
def extract_prostate(original_path, mask_path, output_path):
    global_max_tumor_region = None

    for mask_filename in os.listdir(mask_path):
        if mask_filename.endswith(".nii.gz"):
            mask_file_path = os.path.join(mask_path, mask_filename)
            max_tumor_slice, mask_data = find_max_tumor_slice(mask_file_path)

            if global_max_tumor_region is None or max_tumor_slice[1] > global_max_tumor_region[1]:
                global_max_tumor_region = (mask_filename, max_tumor_slice[0], max_tumor_slice[1], mask_data)

    # Get tumor coordinates in a bounding box
    min_coords, max_coords = get_tumor_coordinates(global_max_tumor_region[3])

    # Get tumor dimensions (width, height, depth)
    width, height, depth = get_tumor_dimensions(min_coords, max_coords)

    print(f"The global maximum tumor region is in {global_max_tumor_region[0]} on slice {global_max_tumor_region[1]} with a tumor pixel sum of {global_max_tumor_region[2]}")
    print(f"Tumor coordinates in bounding box: Min = {min_coords}, Max = {max_coords}")
    print(f"Tumor dimensions: Width = {width}, Height = {height}, Depth = {depth}")

    width = 120 
    height = 120
    depth = 8
    width_slack = 0
    height_slack = 0
    
    # Get a list of all directories in the directory

    directories = [d.split('_')[0] for d in os.listdir(mask_path) if d.endswith('nii.gz')]
    
    for patient_number in directories:
        Path(os.path.join(output_path,f'{patient_number}')).mkdir(parents=True, exist_ok=True)
        extract_tumor_region(original_path, mask_path, output_path, patient_number, width, height, depth, width_slack, height_slack)
        print(f"Extraction of Patient {patient_number} completed.")


In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/nnUNet_raw_data_base/nnUNet_raw_data/Task005_Prostate/imagesTs'
mask_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Gland_Segmentation/Results"
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Extracted_Prostate_Gland'

print("Extraction Started")
extract_prostate(original_path, mask_path, output_path)
print("Extraction Completed")

Convert nifti files to npy files to load them easily later.

In [None]:
def convert_numpy_format(original_path, output_directory):
    
    # Get a list of all directories in the directory
    directories = [d for d in os.listdir(original_path) if os.path.isdir(os.path.join(original_path, d))]

    # Iterate through all NIfTI files in the input directory
    for patient_number in directories:
        # Create the output directory if it doesn't exist
        Path(os.path.join(output_directory,f'{patient_number}')).mkdir(parents=True, exist_ok=True)

        for file_name in os.listdir(os.path.join(original_path,f'{patient_number}')):

            if file_name.endswith('.nii.gz'):  # Ensure it's a NIfTI file
                input_path = os.path.join(original_path, f'{patient_number}', file_name)

                # Load NIfTI image using nibabel
                nifti_img = nib.load(input_path)

                # Get the image data as a NumPy array
                image_data = nifti_img.get_fdata()
                name = os.path.splitext(os.path.splitext(file_name)[0])
                # Save the NumPy array to a file with the same name
                output_path = os.path.join(output_directory, f'{patient_number}', f'{name[0]}.npy')
                np.save(output_path, image_data)

original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Extracted_Prostate_Gland'
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNetModel/Extracted_Prostate_Gland_Numpy'
convert_numpy_format(original_path, output_path)