In [1]:
import openslide
from openslide import open_slide
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
import os
import glob

### Split into Patches and then save as .png images. 

#### This takes in a magnification rescaled image based on the BIT microscope image and Hamamatsu slide scanner, and then splits the image into n x n patches. 

In [None]:
import os
import random
import shutil
from glob import glob

def split_image_into_patches(image, patch_size, overlap, output_folder, handle="patch", extension = "png"):
    """
    Splits an image into n x n patches with overlap, and saves them to a folder.
    Only saves complete patches of the specified size.
    Args:
    - image: PIL image to split
    - patch_size: Size of the patches (e.g., (256, 256))
    - overlap: Overlap between patches (e.g., 50 for 50-pixel overlap)
    - output_folder: Folder to save the patches
    - handle: Prefix for the patch file name
    Returns:
    - None
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)
    # Convert the image to a NumPy array
    np_image = np.array(image)
    # Get the dimensions of the image
    height, width = np_image.shape[0], np_image.shape[1]
    # Calculate the step size (patch size minus overlap)
    step = patch_size[0] - overlap
    patch_number = 1  # Counter for patches
    # Iterate through the image and extract patches
    for y in range(0, height - patch_size[0] + 1, step):
        for x in range(0, width - patch_size[1] + 1, step):
            # Check if the patch is within the image boundaries
            if y + patch_size[0] <= height and x + patch_size[1] <= width:
                # Extract the patch
                patch = np_image[y:y + patch_size[0], x:x + patch_size[1]]
                # Convert the patch to a PIL image
                patch_pil = Image.fromarray(patch)
                # Save the patch
                formatted = f"{patch_number:05d}"
                patch_filename = f"{handle}_{formatted}.{extension}"
                patch_pil.save(os.path.join(output_folder, patch_filename))
                patch_number += 1


def split_images_train_test(source_folder, output_root, train_ratio=0.99, seed=42):
    """
    Randomly splits .tif images in source_folder into train and test folders.

    Args:
        source_folder (str): Path to folder containing .tif images.
        output_root (str): Root folder where 'train/' and 'test/' will be created.
        train_ratio (float): Ratio of images to put in train set.
        seed (int): Random seed for reproducibility.
    """
    os.makedirs(os.path.join(output_root, 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_root, 'test'), exist_ok=True)

    tif_images = glob(os.path.join(source_folder, "*.tif"))
    random.seed(seed)
    random.shuffle(tif_images)

    split_index = int(len(tif_images) * train_ratio)
    train_images = tif_images[:split_index]
    test_images = tif_images[split_index:]

    print(f"Total images: {len(tif_images)}")
    print(f"Train: {len(train_images)}")
    print(f"Test: {len(test_images)}")

    for img_path in train_images:
        shutil.move(img_path, os.path.join(output_root, 'train', os.path.basename(img_path)))

    for img_path in test_images:
        shutil.move(img_path, os.path.join(output_root, 'test', os.path.basename(img_path)))

# Example usage:
# split_images_train_test("/path/to/images", "/path/to/split_output")


In [8]:
# Magnification Information of the microscope and slide scanner. 

mpp_x = float(0.22034197073858627)
mpp_y = float(0.22034197073858627)
print(f"MPP X: {mpp_x} µm/pixel")
print(f"MPP Y: {mpp_y} µm/pixel")
PCO_pixel_size = 6.5 # microns
BIT_magnification = 40
reference_scale = PCO_pixel_size / BIT_magnification
scale_factor_x = mpp_x / reference_scale
scale_factor_y = mpp_x / reference_scale

print(f"x ratio: {scale_factor_x}")
print(f"y ratio: {scale_factor_y}")

MPP X: 0.22034197073858627 µm/pixel
MPP Y: 0.22034197073858627 µm/pixel
x ratio: 1.3559505891605308
y ratio: 1.3559505891605308


In [18]:
slide_paths = r'C:\Users\durrlab-asong\Desktop\virtual_staining_training_data\duodenum\FFPE-HE\OTS-25-25256 - 2025-08-01 13.59.31_duodenum\submucosa'
output_slide_path = os.path.join(slide_paths, "patches")
tif_imgs = glob(os.path.join(slide_paths, "*.tif"))
print(len(tif_imgs))
patch_size = (512, 512)

patch_handle = "duodenum_submucosa_OTS-25-25256_2025-08-01_13-59-31"
overlap = 50 # number of pixel overlap
img_ext = "tif"

for i, tif_img in enumerate(tif_imgs):
    img_roi = Image.open(tif_img)

    # Rescale image. 
    image_roi = img_roi.convert('RGB')
    image_roi = np.array(image_roi)

    # rescale H&E Image.
    rescaled_image_roi = cv.resize(image_roi, None, fx=scale_factor_x, fy=scale_factor_y, interpolation=cv.INTER_CUBIC)
    rescaled_image_roi = Image.fromarray(rescaled_image_roi)
    patch_handle_temp = f"{patch_handle}_roi_{i}"

    split_image_into_patches(img_roi, patch_size, overlap, output_slide_path, handle=patch_handle_temp, extension=img_ext)

17




In [19]:
split_images_train_test(output_slide_path, output_slide_path, train_ratio=0.90, seed=42)

Total images: 4556
Train: 4100
Test: 456
