# Lesion Extraction

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import nibabel as nib
from ipywidgets import interactive, widgets
from IPython.display import display
from pathlib import Path

The slice where the lesion has the biggest area will be found and extracted with provided width, height and depth.

In [None]:
def find_max_tumor_slice(mask_path):
    mask_img = nib.load(mask_path)
    mask_data = mask_img.get_fdata()
    tumor_slices = [(slice_number, mask_data[:, :, slice_number].sum()) for slice_number in range(mask_data.shape[2])]
    max_tumor_slice = max(tumor_slices, key=lambda x: x[1])
    contain_mask = False
    if max_tumor_slice[1] != 0:
        contain_mask = True
    return max_tumor_slice, mask_data, contain_mask

def get_tumor_coordinates(mask_data, threshold=0):
    tumor_coordinates = np.argwhere(mask_data > threshold)
    min_coords = np.min(tumor_coordinates, axis=0)
    max_coords = np.max(tumor_coordinates, axis=0)
    return min_coords, max_coords

def get_tumor_dimensions(min_coords, max_coords):
    width = max_coords[0] - min_coords[0] + 1
    height = max_coords[1] - min_coords[1] + 1
    depth = max_coords[2] - min_coords[2] + 1
    return width, height, depth

def extract_tumor_region(original_path, mask_path, output_path, patient_number, width, height, depth, width_slack=0, height_slack=0):
    
    original_series_files = sorted(
        [file for file in os.listdir(original_path) 
         if file.startswith(patient_number) and (file.endswith('0000.nii.gz') or file.endswith('0001.nii.gz') or file.endswith('0002.nii.gz'))]
    )
    all_files = os.listdir(original_path)
    mask_file = [file for file in os.listdir(mask_path) if file.startswith(patient_number)]
    mask_file_path = os.path.join(mask_path, mask_file[0])
    mask = nib.load(mask_file_path)

    # Get data arrays
    mask_data = mask.get_fdata()

    # Find the slice with the largest tumor region
    tumor_slices = np.sum(mask_data, axis=(0, 1))
    largest_slice = np.argmax(tumor_slices)

    # Find the center of the tumor region in the largest slice
    tumor_indices = np.argwhere(mask_data[:, :, largest_slice] > 0)
    center = np.mean(tumor_indices, axis=0)
    nan_mask = np.isnan(center)
    if np.any(nan_mask):
        return

    start = max(0, (2*largest_slice - depth) // 2)
    end = min(mask_data.shape[2], (2*largest_slice + depth) // 2)
    if (2*largest_slice - depth) // 2 < 0:
        start = 0
        end = depth
    
    if (2*largest_slice + depth) // 2 > mask_data.shape[2]:
        start = mask_data.shape[2] - depth
        end = mask_data.shape[2]

    h_s = center[0] - (height / 2 + height_slack)
    h_e = center[0] + (height / 2 + height_slack)
    w_s = center[1] - (width / 2 + width_slack)
    w_e = center[1] + (width / 2 + width_slack)

    if center[0] - (height / 2 + height_slack) < 0:
        h_s = 0
        h_e = height + height_slack
    if center[0] + (height / 2 + height_slack) > mask_data.shape[0]:
        h_s = mask_data.shape[0] - (height + height_slack)
        h_e = mask_data.shape[0]
    if center[1] - (width / 2 + width_slack) < 0:
        w_s = 0
        w_e = width + width_slack
    if center[1] + (width / 2 + width_slack) > mask_data.shape[1]:
        w_s = mask_data.shape[1] - (width + width_slack)
        w_e = mask_data.shape[1]

    # Extract the mask region based on the same coordinates
    extracted_mask = mask_data[
        int(h_s):int(h_e),
        int(w_s):int(w_e),
        start:end
    ]

    Path(os.path.join(output_path,f'{patient_number}')).mkdir(parents=True, exist_ok=True)
    new_mask = nib.Nifti1Image(extracted_mask, mask.affine)
    nib.save(new_mask, os.path.join(os.path.join(output_path, f'{patient_number}'), mask_file[0]))
    
    for file in original_series_files:
        # Load NIfTI files
        img = nib.load(os.path.join(original_path,file))
        image_data = img.get_fdata()

        # Extract the image region based on the provided width, height, and depth with slack
        extracted_image = image_data[
            int(h_s):int(h_e),
            int(w_s):int(w_e),
            start:end
        ]
        
        # Set pixels in the original image to zero where the mask is black
        modified_data = np.where(extracted_mask == 0, 0, extracted_image)

        # Save the extracted regions to new NIfTI files
        new_img = nib.Nifti1Image(modified_data, img.affine)
        nib.save(new_img, os.path.join(os.path.join(output_path, f'{patient_number}'),file))

In [None]:
def extract_prostate(original_path, mask_path, output_path,width,height,depth):
    global_max_tumor_region = None
    
    patients_with_mask = list()

    for mask_filename in os.listdir(mask_path):
        if mask_filename.endswith(".nii.gz"):
            mask_file_path = os.path.join(mask_path, mask_filename)
            max_tumor_slice, mask_data, contain_mask = find_max_tumor_slice(mask_file_path)

            if contain_mask:
                patients_with_mask.append(mask_filename[:-7])

            if global_max_tumor_region is None or max_tumor_slice[1] > global_max_tumor_region[1]:
                global_max_tumor_region = (mask_filename, max_tumor_slice[0], max_tumor_slice[1], mask_data)

    # Get tumor coordinates in a bounding box
    min_coords, max_coords = get_tumor_coordinates(global_max_tumor_region[3])

    # Get tumor dimensions (width, height, depth)
    p_width, p_height, p_depth = get_tumor_dimensions(min_coords, max_coords)

    print(f"The global maximum tumor region is in {global_max_tumor_region[0]} on slice {global_max_tumor_region[1]} with a tumor pixel sum of {global_max_tumor_region[2]}")
    print(f"Tumor coordinates in bounding box: Min = {min_coords}, Max = {max_coords}")
    print(f"Tumor dimensions: Width = {p_width}, Height = {p_height}, Depth = {p_depth}")
    print(f"Total Number of patients with Tumor Mask: {len(patients_with_mask)}")
    
    width_slack = 0
    height_slack = 0   

    directories = [d.split('_')[0] for d in patients_with_mask]
     
    for patient_number in directories:
        extract_tumor_region(original_path, mask_path, output_path, patient_number, width, height, depth, width_slack, height_slack)
        print(f"Extraction of Patient {patient_number} completed.")

They will be converted to npy arrays for easy loading on classification part.

In [None]:
def convert_numpy_format(original_path, output_directory):
    
    # Get a list of all directories in the directory
    directories = [d for d in os.listdir(original_path) if os.path.isdir(os.path.join(original_path, d))]

    # Iterate through all NIfTI files in the input directory
    for patient_number in directories:
        # Create the output directory if it doesn't exist
        Path(os.path.join(output_directory,f'{patient_number}')).mkdir(parents=True, exist_ok=True)

        for file_name in os.listdir(os.path.join(original_path,f'{patient_number}')):

            if file_name.endswith('.nii.gz'):  # Ensure it's a NIfTI file
                input_path = os.path.join(os.path.join(original_path,f'{patient_number}'), file_name)
                # Load NIfTI image using nibabel
                nifti_img = nib.load(input_path)

                # Get the image data as a NumPy array
                image_data = nifti_img.get_fdata()
                name = os.path.splitext(os.path.splitext(file_name)[0])
                # Save the NumPy array to a file with the same name
                output_path = os.path.join(os.path.join(output_directory,f'{patient_number}'), f'{name[0]}.npy')
                np.save(output_path, image_data)

Tumor regions on picai dataset will be extracted with provided shape where depth is 8.

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset600_Hum_AI/imagesTr'
mask_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset600_Hum_AI/labelsTr"
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Picai_Extracted_Regions_AI'

width = 60
height = 60
depth = 8

print("Extraction Started")
extract_prostate(original_path, mask_path, output_path, width, height, depth)
print("Extraction Completed")

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Picai_Extracted_Regions_AI'
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Picai_Extracted_Regions_AI_Numpy'
convert_numpy_format(original_path, output_path)

Tumor regions on picai dataset will be extracted with provided shape where depth is 3.

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset600_Hum_AI/imagesTr'
mask_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/nnUNet_raw/Dataset600_Hum_AI/labelsTr"
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Picai_AI_Extracted_3_Slice'

width = 60
height = 60
depth = 3

print("Extraction Started")
extract_prostate(original_path, mask_path, output_path, width, height, depth)
print("Extraction Completed")

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Picai_AI_Extracted_3_Slice'
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Picai_AI_Extracted_3_Slice_Numpy'
convert_numpy_format(original_path, output_path)

Tumor regions on private dataset that is not registered and not resized will be extracted with provided shape.

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Preprocessed/Private_Dataset/imagesTr'
mask_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/3d_fullres_Picai_Pipeline_Results_fold0"
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Extracted_Tumor_Regions_Margin_Cropped'

width = 99
height = 92
depth = 30

print("Extraction Started")
extract_prostate(original_path, mask_path, output_path, width, height, depth)
print("Extraction Completed")

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Extracted_Tumor_Regions_Margin_Cropped'
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Extracted_Tumor_Regions_Margin_Cropped_Numpy'
convert_numpy_format(original_path, output_path)

Tumor regions on private dataset that is not registered but resized will be extracted with provided shape.

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Preprocessed_2/Private_Dataset/imagesTr'
mask_path = "/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Private_Dataset_Segmentation_Results"
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Voxel_Results_Cropped'

width = 60
height = 60
depth = 8

print("Extraction Started")
extract_prostate(original_path, mask_path, output_path, width, height, depth)
print("Extraction Completed")

In [None]:
original_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Voxel_Results_Cropped'
output_path = '/local_ssd/practical_wise24/prostate_cancer/NNUNet_Lesion/Voxel_Results_Cropped_Numpy'
convert_numpy_format(original_path, output_path)