In [8]:
import re
from pathlib import Path
import numpy as np
import tifffile


# ============================================================================
# CONFIGURATION
# ============================================================================

# Input/Output paths
INPUT_DIR = r"/content/drive/MyDrive/soil_microCT_images/3-16mm_diam_5.85um/3-16mm_diam_5.85um"
OUTPUT_DIR = r"/content/drive/MyDrive/soil_microCT_images/ROI/mishmar_hanegev_maoz_ROI_16bit"

# ROI parameters
ROI_SIZE = 650  # Square ROI size in pixels (width = height)

# Slice trimming parameters
TRIM_START = 75  # Number of slices to discard from the beginning
TRIM_END = 75    # Number of slices to discard from the end

# ============================================================================
# MAIN SCRIPT
# ============================================================================

def find_valid_slices(input_dir):
    """
    Find and sort valid TIFF slices matching the pattern rec0000XXXX.tif.

    Args:
        input_dir: Path to directory containing TIFF slices

    Returns:
        List of (slice_index, file_path) tuples sorted by slice index
    """
    input_path = Path(input_dir)
    pattern = re.compile(r'slice(\d{5})\.tif$')


    valid_slices = []
    for file_path in input_path.iterdir():
        if file_path.is_file():
            match = pattern.search(file_path.name)
            if match:
                slice_index = int(match.group(1))
                valid_slices.append((slice_index, file_path))

    # Sort by slice index
    valid_slices.sort(key=lambda x: x[0])

    return valid_slices


def compute_roi_coordinates(image_height, image_width, roi_size):
    """
    Compute ROI coordinates centered on the image.

    Args:
        image_height: Height of the image
        image_width: Width of the image
        roi_size: Size of the square ROI

    Returns:
        Tuple (y_start, y_end, x_start, x_end) defining ROI boundaries

    Raises:
        ValueError: If ROI exceeds image boundaries
    """
    # Compute image center
    center_y = image_height // 2
    center_x = image_width // 2

    # Compute ROI boundaries
    half_roi = roi_size // 2
    y_start = center_y - half_roi
    y_end = y_start + roi_size
    x_start = center_x - half_roi
    x_end = x_start + roi_size

    # Check boundaries
    if y_start < 0 or y_end > image_height or x_start < 0 or x_end > image_width:
        raise ValueError(
            f"ROI size {roi_size} exceeds image dimensions ({image_height}x{image_width}). "
            f"Computed ROI: y=[{y_start}:{y_end}], x=[{x_start}:{x_end}]"
        )

    return y_start, y_end, x_start, x_end


def extract_roi_from_dataset(input_dir, output_dir, roi_size, trim_start, trim_end):
    """
    Extract ROI from all valid slices in the dataset.

    Args:
        input_dir: Path to input directory with TIFF slices
        output_dir: Path to output directory for TIFF slices
        roi_size: Size of square ROI
        trim_start: Number of slices to discard from start
        trim_end: Number of slices to discard from end
    """
    print(f"Scanning input directory: {input_dir}")

    # Find and sort valid slices
    valid_slices = find_valid_slices(input_dir)
    total_slices = len(valid_slices)

    print(f"Found {total_slices} valid slices")

    # Check minimum slice requirement
    min_required = trim_start + trim_end
    if total_slices < min_required:
        raise ValueError(
            f"Insufficient slices: found {total_slices}, but need at least "
            f"{min_required} (trim_start={trim_start} + trim_end={trim_end})"
        )

    # Apply trimming
    selected_slices = valid_slices[trim_start:-trim_end if trim_end > 0 else None]
    num_selected = len(selected_slices)

    print(f"After trimming: {num_selected} slices selected (discarded first {trim_start} and last {trim_end})")

    if num_selected == 0:
        raise ValueError("No slices remaining after trimming")

    # Read first slice to determine ROI coordinates
    print(f"Reading first slice to determine ROI coordinates...")
    first_slice_path = selected_slices[0][1]
    first_image = tifffile.imread(first_slice_path)
    image_height, image_width = first_image.shape[:2]

    print(f"Image dimensions: {image_height}x{image_width}")

    # Compute ROI coordinates (same for all slices)
    y_start, y_end, x_start, x_end = compute_roi_coordinates(image_height, image_width, roi_size)

    print(f"ROI coordinates: y=[{y_start}:{y_end}], x=[{x_start}:{x_end}]")
    print(f"ROI size: {roi_size}x{roi_size}")

    # Create output directory
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    print(f"Output directory: {output_dir}")
    print(f"\nProcessing slices...")

    # Process each slice
    for i, (slice_index, slice_path) in enumerate(selected_slices):
        # Read slice
        image = tifffile.imread(slice_path)

        # Extract ROI
        roi = image[y_start:y_end, x_start:x_end]

        # Generate output filename preserving order
        output_filename = f"roi_{i:04d}_slice{slice_index:04d}.tif"
        output_file = output_path / output_filename

        # Save as 16-bit TIFF
        tifffile.imwrite(output_file, roi, photometric='minisblack')

        # Progress update
        if (i + 1) % 50 == 0 or (i + 1) == num_selected:
            print(f"  Processed {i + 1}/{num_selected} slices")

    print(f"\nROI extraction complete!")
    print(f"Output saved to: {output_dir}")


def main():
    """Main execution function."""
    try:
        extract_roi_from_dataset(
            input_dir=INPUT_DIR,
            output_dir=OUTPUT_DIR,
            roi_size=ROI_SIZE,
            trim_start=TRIM_START,
            trim_end=TRIM_END
        )
    except Exception as e:
        print(f"\nError: {e}")
        raise


if __name__ == "__main__":
    main()

Scanning input directory: /content/drive/MyDrive/soil_microCT_images/3-16mm_diam_5.85um/3-16mm_diam_5.85um
Found 1353 valid slices
After trimming: 1203 slices selected (discarded first 75 and last 75)
Reading first slice to determine ROI coordinates...
Image dimensions: 1845x1845
ROI coordinates: y=[597:1247], x=[597:1247]
ROI size: 650x650
Output directory: /content/drive/MyDrive/soil_microCT_images/ROI/mishmar_hanegev_maoz_ROI_16bit

Processing slices...
  Processed 50/1203 slices
  Processed 100/1203 slices
  Processed 150/1203 slices
  Processed 200/1203 slices
  Processed 250/1203 slices
  Processed 300/1203 slices
  Processed 350/1203 slices
  Processed 400/1203 slices
  Processed 450/1203 slices
  Processed 500/1203 slices
  Processed 550/1203 slices
  Processed 600/1203 slices
  Processed 650/1203 slices
  Processed 700/1203 slices
  Processed 750/1203 slices
  Processed 800/1203 slices
  Processed 850/1203 slices
  Processed 900/1203 slices
  Processed 950/1203 slices
  Proces

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Now that Google Drive is mounted, please update the `INPUT_DIR` in the cell below to the path of your TIFF slices within Google Drive. For example, if your slices are in a folder named `my_tiff_data` in your Google Drive, the path would be `/content/drive/MyDrive/my_tiff_data`.