# Automated Cropping and Saving of Medical Imaging Data

## Overview
This notebook processes 3D medical images (stored as `.nii.gz` files) by:
1. Reading slice selection information from a CSV file.
2. Cropping the images based on specified slice ranges.
3. Saving the cropped images as new `.nii.gz` files, preserving the original metadata.

The workflow uses `SimpleITK` for handling medical imaging data and `pandas` for managing metadata in the CSV file.

## Prerequisites
- Input data:
  - A directory containing `.nii.gz` files.
  - A CSV file with columns:
    - `pid`: Patient identifier matching the image file prefix.
    - `slice_start` and `slice_stop`: Start and end indices for cropping.
- Python libraries: `SimpleITK`, `pandas`, `numpy`, `os`, `glob`.

## Output
The cropped images are saved in a specified output directory with filenames formatted as `<pid>_img_L2-L5.nii.gz`.


In [3]:
import SimpleITK as sitk

# Function to save a NumPy array as a NIfTI (.nii) file
# Parameters:
# - mask_array: The 3D NumPy array to be saved as a NIfTI file.
# - nii_template_path: Path to an existing NIfTI file used as a template for metadata (spacing, direction, origin).
# - nii_save_path: Output path to save the new NIfTI file.
def save_array_to_nii(mask_array, nii_template_path, nii_save_path):
    # Read the template NIfTI file to extract metadata
    sitk_image_object = sitk.ReadImage(nii_template_path)
    output_spacing = sitk_image_object.GetSpacing()       # Voxel spacing
    output_direction = sitk_image_object.GetDirection()   # Orientation of the image
    output_origin = sitk_image_object.GetOrigin()         # Position in physical space

    # Convert NumPy array to a SimpleITK image object
    nrrd_output = sitk.GetImageFromArray(mask_array)
    # Set the metadata from the template
    nrrd_output.SetSpacing(output_spacing)
    nrrd_output.SetDirection(output_direction)
    nrrd_output.SetOrigin(output_origin)

    # Save the NIfTI file with compression enabled
    nrrdWriter = sitk.ImageFileWriter()
    nrrdWriter.SetFileName(nii_save_path)
    nrrdWriter.SetUseCompression(True)
    nrrdWriter.Execute(nrrd_output)

    # Log the saved file path
    print(nii_save_path, 'saved')


In [None]:
import os
import glob
import pandas as pd
import numpy as np

# Paths to input files and directories
csv_path = 'csv/MyoSegmenTUM_54_water_fat_L2-L5.csv'  # Path to the CSV file with slice indices
img_dir = 'nnUNet_raw_data_base/nnUNet_train_data_raw/img_in_nii/'  # Directory with original NIfTI files
img_L25_dir = 'nnUNet_raw_data_base/nnUNet_train_data_raw/img_in_nii_L2-L5/'  # Directory for cropped output files

# Load the CSV file into a DataFrame
df = pd.read_csv(csv_path)

# Iterate over each row in the CSV file
for index, row in df.iterrows():
    # Extract patient ID and locate the corresponding image file
    pid = row['pid']
    img_path = glob.glob(os.path.join(img_dir, pid + '*.nii.gz'))  # Find the .nii.gz file for the patient
    
    if len(img_path) >0:
        img_path = img_path[0]
    
        print(img_path)  # Debug: Print the path of the found image file

        # Define the target path for the cropped image
        targ_path = os.path.join(img_L25_dir, f"{pid}_img_L2-L5.nii.gz")

        # Load the image as a SimpleITK object and convert it to a NumPy array
        img_sitk = sitk.ReadImage(img_path)
        img_array = sitk.GetArrayFromImage(img_sitk)
        print(img_array.shape)  # Debug: Print the shape of the full 3D image array

        # Extract the slice range from the CSV file
        slice_start, slice_stop = int(row['slice_start']), int(row['slice_stop'])
        img_crop_array = img_array[slice_start:slice_stop, :, :]  # Crop the array along the first axis (slices)
        print(slice_start, slice_stop, img_crop_array.shape)  # Debug: Print slice range and cropped shape

        # Save the cropped array as a new NIfTI file
        save_array_to_nii(img_crop_array, img_path, targ_path)
