In [23]:
import SimpleITK as sitk
import os
import shutil
import numpy as np
import nibabel as nib
from scipy.ndimage import zoom
import pydicom
import cv2
import matplotlib.pyplot as plt
import scipy.interpolate as spi


# Standardize Spacing and Dimensions


In [None]:

# Standardize the spacing

def clone_directory_structure(src, dst):
    """
    Clone the directory structure from src to dst.
    """
    for root, dirs, files in os.walk(src):
        # Create the corresponding directory in the destination
        dst_dir = root.replace(src, dst, 1)  # Replace src with dst in the path
        os.makedirs(dst_dir, exist_ok=True)  # Create the directory if it doesn't exist

def monotonic_zoom_interpolate(image_np, resize_factor):
    """
    Apply monotonic zoom interpolation to a given image.
    """
    result = image_np.copy()
    
    for axis, factor in enumerate(resize_factor[::-1]):
        # Create a new array for the interpolated values
        new_length = int(result.shape[axis] * factor)
        x_old = np.arange(result.shape[axis])
        x_new = np.linspace(0, result.shape[axis] - 1, new_length)
        
        # Perform monotonic interpolation
        pchip_interp = spi.PchipInterpolator(x_old, result.take(indices=x_old, axis=axis), axis=axis)
        result = pchip_interp(x_new)

    return result

def process_single_slice_dicom(dicom_file, target_resolution, output_dir):
    """
    Load and standardize the spacing of a single-slice DICOM file.
    """
    
    # Load the DICOM file
    image = sitk.ReadImage(dicom_file)
    current_spacing = np.array(image.GetSpacing())

    resize_factor = np.array([current_spacing[0] / target_resolution[0], 
                               current_spacing[1] / target_resolution[1], 
                               1.0])

    new_real_shape = np.array(image.GetSize()) * resize_factor

    new_shape = np.round(new_real_shape).astype(int)
    
    real_resize_factor = new_shape / np.array(image.GetSize())
    
    image_np = sitk.GetArrayFromImage(image)

    # image_resampled_np = zoom(image_np, real_resize_factor[::-1], order=1)
    image_resampled_np = monotonic_zoom_interpolate(image_np, real_resize_factor)

    image_resampled = sitk.GetImageFromArray(image_resampled_np)

    new_spacing = np.array([target_resolution[0], target_resolution[1], current_spacing[2]])

    image_resampled.SetSpacing(new_spacing)
    
    output_file = os.path.join(output_dir, os.path.basename(dicom_file))
    sitk.WriteImage(image_resampled, output_file)

    print(f"Saved: {output_file}")
    print(f"Original shape: {image_np.shape}, Resampled shape: {image_resampled_np.shape}")
    print(f"Original spacing: {current_spacing}, New spacing: {image_resampled.GetSpacing()}")

def process_mask_dicom(dicom_file, target_resolution, output_dir):
    # Load the DICOM file
    image = sitk.ReadImage(dicom_file)
    current_spacing = np.array(image.GetSpacing())

    resize_factor = np.array([current_spacing[0] / target_resolution[0], 
                               current_spacing[1] / target_resolution[1], 
                               1.0])

    new_real_shape = np.array(image.GetSize()) * resize_factor

    new_shape = np.round(new_real_shape).astype(int)
    
    real_resize_factor = new_shape / np.array(image.GetSize())
    
    image_np = sitk.GetArrayFromImage(image)

    image_resampled_np = zoom(image_np, real_resize_factor[::-1], order=0)
    # image_resampled_np = monotonic_zoom_interpolate(image_np, real_resize_factor)

    image_resampled = sitk.GetImageFromArray(image_resampled_np)

    new_spacing = np.array([target_resolution[0], target_resolution[1], current_spacing[2]])

    image_resampled.SetSpacing(new_spacing)
    
    output_file = os.path.join(output_dir, os.path.basename(dicom_file))
    sitk.WriteImage(image_resampled, output_file)

    print(f"Saved: {output_file}")
    print(f"Original shape: {image_np.shape}, Resampled shape: {image_resampled_np.shape}")
    print(f"Original spacing: {current_spacing}, New spacing: {image_resampled.GetSpacing()}")


def loop_and_standardize(dataset_path, target_resolution, output_path, search_term="4d"):
    """
    Loop over the dataset, clone the folder structure, and standardize the resolution of single-slice data.
    """
    # Clone the directory structure
    # clone_directory_structure(dataset_path, output_path)

    for root, dirs, files in os.walk(dataset_path):
        # Ignore directories with '4d' in their name
        if search_term not in os.path.basename(root):
            if "gt" not in os.path.basename(root):
                for file in files:
                    if search_term not in file:
                        if "gt" not in file:
                            if file.endswith(".nii"):  # Assuming DICOM files are used
                                dicom_file_path = os.path.join(root, file)
                                print(f"Processing: {dicom_file_path}")
                                
                                # Process and save in the corresponding output directory
                                output_dir = root.replace(dataset_path, output_path, 1)
                                process_single_slice_dicom(dicom_file_path, target_resolution, output_dir)
            else:
                for file in files:
                    if search_term not in file:
                        if "gt" in file:
                            if file.endswith(".nii"):
                                dicom_file_path = os.path.join(root, file)
                                print(f"Processing: {dicom_file_path}")
                                
                                # Process and save in the corresponding output directory
                                output_dir = root.replace(dataset_path, output_path, 1)
                                process_mask_dicom(dicom_file_path, target_resolution, output_dir)


# Example usage:
dataset_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/training"  # Original dataset path
output_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # New path for standardized data
target_spacing = np.array([1.0, 1.0,1.0])  # Example target resolution (x, y, z spacing)

loop_and_standardize(dataset_path, target_spacing, output_path)


In [25]:

def check_unified_resolution(output_path):
    """
    Check if all images in the output directory have unified resolution.
    """
    resolutions = set()  # To store unique resolutions

    for root, dirs, files in os.walk(output_path):
    
        for file in files:
            # print(file)
            if file.endswith(".nii"):  # Check for NIfTI files
                nifti_file_path = os.path.join(root, file)
                try:
                    image = sitk.ReadImage(nifti_file_path)
                    spacing = image.GetSpacing()  # Get the spacing (resolution)
                    resolutions.add(spacing)  # Add to the set of unique resolutions

                except Exception as e:
                    print(f"Failed to read {nifti_file_path}: {str(e)}")

    # Check the number of unique resolutions
    if len(resolutions) == 1:
        print("All images have a unified resolution.")
    else:
        print("There are multiple resolutions in the dataset:")
        for res in resolutions:
            print(res)

# Example usage:
output_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # Path where standardized images are saved
check_unified_resolution(output_path)


There are multiple resolutions in the dataset:
(1.0, 1.0, 10.0)
(1.0, 1.0, 6.5)
(1.0, 1.0, 5.0)
(1.0, 1.0, 1.0)
(1.0, 1.0, 7.0)


# Normalize

In [None]:
def normalize_file_by_file(dicom_file, output_dir):
    """
    load and normalize single slice dicome file by z-score normalization
    """
    image = sitk.ReadImage(dicom_file)
    image_np = sitk.GetArrayFromImage(image)
    mean = image_np.mean()
    std = image_np.std()
    image_np = (image_np - mean) / std
    image = sitk.GetImageFromArray(image_np)
    output_file = os.path.join(output_dir, os.path.basename(dicom_file))
    sitk.WriteImage(image, output_file)
    print(f"Saved: {output_dir}")
    print(f"Mean: {mean}, Std: {std}")
    

def loop_and_normalize(dataset_path, output_path, search_term="4d"):
    """
    Loop over the dataset, clone the folder structure, and standardize the resolution of single-slice data.
    """
    # Clone the directory structure
    # clone_directory_structure(dataset_path, output_path)

    for root, dirs, files in os.walk(dataset_path):
        # Ignore directories with '4d' in their name
        if search_term not in os.path.basename(root):
            if "gt" not in os.path.basename(root):
                for file in files:
                    if search_term not in file:
                        if "gt" not in file:
                            if file.endswith(".nii"):  # Assuming DICOM files are used
                                dicom_file_path = os.path.join(root, file)
                                print(f"Processing: {dicom_file_path}")
                                
                                # Process and save in the corresponding output directory
                                output_dir = root.replace(dataset_path, output_path, 1)
                                process_single_slice_dicom(dicom_file_path, target_resolution, output_dir)
# Example usage:
dataset_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # Original dataset path
output_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # New path for standardized data
loop_and_normalize(dataset_path, output_path)

In [None]:
def check_image_values(output_path):
    """
    Check the minimum and maximum pixel values of the images in the output directory.
    """
    for root, dirs, files in os.walk(output_path):
        for file in files:
            if file.endswith(".nii"):  # Check for NIfTI files
                nifti_file_path = os.path.join(root, file)
                try:
                    image = sitk.ReadImage(nifti_file_path)
                    image_np = sitk.GetArrayFromImage(image)
                    print(f"Min: {image_np.min()}, Max: {image_np.max()} for: {nifti_file_path}")
                except Exception as e:
                    print(f"Failed to read {nifti_file_path}: {str(e)}")

output_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # Path where standardized images are saved
check_image_values(output_path)

In [None]:
# standardize the Dimension by padding
def process_Dimension_by_padding(dicom_file, target_resolution, output_dir):
    """
    Load and standardize the resolution of a single-slice DICOM file using padding.
    """
    image = sitk.ReadImage(dicom_file)
    constant_val = int(sitk.GetArrayFromImage(image).min())

    current_size = np.array(image.GetSize())
    padding_left_right = target_resolution[0] - current_size[0]
    padding_top_bottom = target_resolution[1] - current_size[1]
    padding_left = int(padding_left_right // 2)
    padding_right = int(padding_left_right - padding_left)
    padding_top = int(padding_top_bottom // 2)
    padding_bottom = int(padding_top_bottom - padding_top)

    transformed = sitk.ConstantPad(image,(padding_left,padding_top,0),(padding_right,padding_bottom,0),constant_val)
    
    
    
    output_file = os.path.join(output_dir, os.path.basename(dicom_file))
    sitk.WriteImage(transformed, output_file)

def loop_and_standardize(dataset_path, target_resolution, output_path, search_term="4d"):
    """
    Loop over the dataset, clone the folder structure, and standardize the resolution of single-slice data.
    """
    # Clone the directory structure
    # clone_directory_structure(dataset_path, output_path)

    for root, dirs, files in os.walk(dataset_path):
        # Ignore directories with '4d' in their name
        if search_term not in os.path.basename(root):
            for file in files:
                if search_term not in file:
                    if file.endswith(".nii"):  # Assuming DICOM files are used
                        dicom_file_path = os.path.join(root, file)
                        print(f"Processing: {dicom_file_path}")
                        
                        # Process and save in the corresponding output directory
                        output_dir = root.replace(dataset_path, output_path, 1)
                        process_Dimension_by_padding(dicom_file_path, target_resolution, output_dir)

# Example usage:
dataset_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # Original dataset path
output_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # New path for standardized data
target_spacing = np.array([512, 512,1.0])  # Example target resolution (x, y, z spacing)

loop_and_standardize(dataset_path, target_spacing, output_path)

In [None]:
# check if their is a resolution more than 512x512
def check_Dimension(output_path):
    """
    Check if all images in the output directory have a resolution of 512x512.
    """
    for root, dirs, files in os.walk(output_path):
        for file in files:
            if file.endswith(".nii"):  # Check for NIfTI files
                nifti_file_path = os.path.join(root, file)
                try:
                    image = sitk.ReadImage(nifti_file_path)
                    size = image.GetSize()  # Get the size (resolution)
                    if size[0] == 512 or size[1] == 512:
                        print(f"Resolution is 512x512 for: {nifti_file_path}")
                    else:
                        print(f"Resolution lower 512x512 for: {nifti_file_path}")
                except Exception as e:
                    print(f"Failed to read {nifti_file_path}: {str(e)}")

output_path = "/Users/ahmed_ali/Documents/GitHub/GP-2025-Strain/Data/ACDC/database/train_standardized"  # Path where standardized images are saved
check_Dimension(output_path)