# Automated Cropping and Saving of Medical Imaging Data

## Overview
This notebook processes 3D medical images (stored as `.nii.gz` files) by:
1. Reading slice selection information from a CSV file.
2. Cropping the images based on specified slice ranges.
3. Saving the cropped images as new `.nii.gz` files, preserving the original metadata.

The workflow uses `SimpleITK` for handling medical imaging data and `pandas` for managing metadata in the CSV file.

## Prerequisites
- Input data:
  - A directory containing `.nii.gz` files.
  - A CSV file with columns:
    - `pid`: Patient identifier matching the image file prefix.
    - `slice_start` and `slice_stop`: Start and end indices for cropping.
- Python libraries: `SimpleITK`, `pandas`, `numpy`, `os`, `glob`.

## Output
The cropped images are saved in a specified output directory with filenames formatted as `<pid>_img_L4-L5.nii.gz`.


In [3]:
import SimpleITK as sitk

# Function to save a NumPy array as a NIfTI (.nii) file
# Parameters:
# - mask_array: The 3D NumPy array to be saved as a NIfTI file.
# - nii_template_path: Path to an existing NIfTI file used as a template for metadata (spacing, direction, origin).
# - nii_save_path: Output path to save the new NIfTI file.
def save_array_to_nii(mask_array, nii_template_path, nii_save_path):
    # Read the template NIfTI file to extract metadata
    sitk_image_object = sitk.ReadImage(nii_template_path)
    output_spacing = sitk_image_object.GetSpacing()       # Voxel spacing
    output_direction = sitk_image_object.GetDirection()   # Orientation of the image
    output_origin = sitk_image_object.GetOrigin()         # Position in physical space

    # Convert NumPy array to a SimpleITK image object
    nrrd_output = sitk.GetImageFromArray(mask_array)
    # Set the metadata from the template
    nrrd_output.SetSpacing(output_spacing)
    nrrd_output.SetDirection(output_direction)
    nrrd_output.SetOrigin(output_origin)

    # Save the NIfTI file with compression enabled
    nrrdWriter = sitk.ImageFileWriter()
    nrrdWriter.SetFileName(nii_save_path)
    nrrdWriter.SetUseCompression(True)
    nrrdWriter.Execute(nrrd_output)

    # Log the saved file path
    print(nii_save_path, 'saved')


In [None]:
import os
import glob
import pandas as pd
import numpy as np

# Paths to input files and directories
csv_path = 'csv_and_figures/word_img_size_100_to_L2L5.csv'  # Path to the CSV file with slice indices
img_dir = 'nnUNet_raw_data_base/nnUNet_train_data_raw/img_in_nii/'  # Directory with original NIfTI files
img_L4L5_dir = 'nnUNet_raw_data_base/nnUNet_train_data_raw/img_in_nii_L4-L5/'  # Directory for cropped output files

# === Load CSV ===
df = pd.read_csv(csv_path)

# === Iterate and crop ===
for _, row in df.iterrows():
    pid = row['pid'].replace('.nii.gz', '')
    img_path_list = glob.glob(os.path.join(img_dir, pid + '*.nii.gz'))

    if len(img_path_list) == 0:
        print(f"Image not found for: {pid}")
        continue

    img_path = img_path_list[0]
    output_path = os.path.join(img_L4L5_dir, f"{pid}_img_L4-L5.nii.gz")

    # Read full image
    img_sitk = sitk.ReadImage(img_path)
    img_array = sitk.GetArrayFromImage(img_sitk)

    # Get slice range from L3/L4 to L5/S1 (inclusive of L3_L4, exclusive of below L5_S1)
    slice_start = int(row['L3_L4'])
    slice_stop = int(row['L5_S1'])

    if slice_stop > img_array.shape[0]:
        print(f"Warning: slice_stop {slice_stop} exceeds image depth {img_array.shape[0]} for {pid}")
        slice_stop = img_array.shape[0]

    cropped_array = img_array[slice_start:slice_stop, :, :]
    print(f"{pid}: Cropped {slice_start} to {slice_stop} → shape: {cropped_array.shape}")

    # Save cropped region
    save_array_to_nii(cropped_array, img_path, output_path)
