In [1]:
import os

# Get the current working directory
current_path = os.getcwd()

# Define the relative paths to your data and .venv folders
data_path = os.path.join(current_path, 'data')
venv_path = os.path.join(current_path, '.venv')

# Use the relative paths in your code
OPENSLIDE_PATH = os.path.join(venv_path, 'Lib', 'site-packages', 'openslide-win64-20230414', 'bin')

if hasattr(os, 'add_dll_directory'):
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide

from openslide import open_slide
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt
import xml.etree.ElementTree as ET
import random
from shapely.geometry import Point, Polygon, box
import cv2

True

In [None]:
def efficient_patch_generator_with_padding(slide, patch_size=256, level=0):
    """
    A generator function to yield non-overlapping patches from a slide at a specified level.
    This function extracts patches directly using the read_region method and pads the image if necessary.
    
    Parameters:
    - slide: The OpenSlide slide object.
    - patch_size: Desired size of each patch (patch will be of size patch_size x patch_size).
    - level: Desired level or magnification.
    
    Yields:
    - Individual patches one by one.
    """
    
    # Dimensions at the desired level
    w, h = slide.level_dimensions[level]
    
    # Calculate the required padding
    pad_w = (patch_size - (w % patch_size)) % patch_size
    pad_h = (patch_size - (h % patch_size)) % patch_size
    
    # Number of patches along width and height after padding
    num_patches_width = (w + pad_w) // patch_size
    num_patches_height = (h + pad_h) // patch_size
    
    # Extract and yield patches
    for i in range(num_patches_height):
        for j in range(num_patches_width):
            # Calculate the top-left corner coordinates for the current patch
            start_x = j * patch_size
            start_y = i * patch_size
            
            # Use read_region to extract the patch
            patch = slide.read_region((start_x, start_y), level, (patch_size, patch_size))
            patch = np.array(patch)[:, :, :3]  # Exclude the alpha channel
            
            # If this patch is from the padded region, pad it with white pixels
            if i == num_patches_height - 1 and pad_h > 0:
                patch = np.pad(patch, ((0, pad_h), (0, 0), (0, 0)), mode='constant', constant_values=255)
            if j == num_patches_width - 1 and pad_w > 0:
                patch = np.pad(patch, ((0, 0), (0, pad_w), (0, 0)), mode='constant', constant_values=255)
            
            yield patch

# Note: We can't test this function here due to the absence of the OpenSlide library in this environment. 
# But when run on your machine, this function should efficiently extract patches, including padding when necessary.


In [None]:
def determine_batch_size(patch_size, level_dimensions):
    """
    Determine batch size based on patch size and image dimensions at a specific level.
    
    Parameters:
    - patch_size: Size of the patch (patches are square).
    - level_dimensions: Tuple of (width, height) representing dimensions of the image at the desired level.
    
    Returns:
    - Batch size.
    """
    # For demonstration purposes, let's assume the GPU can handle an image of maximum size 4096x4096
    # This value can be adjusted based on actual GPU capacity
    max_gpu_image_size = 4096 * 4096
    max_patches_in_batch = max_gpu_image_size // (patch_size * patch_size)
    
    return max_patches_in_batch

In [None]:
def batched_patch_generator(slide, patch_size=256, level=0):
    """
    A generator function to yield batches of non-overlapping patches from a slide at a specified level.
    
    Parameters:
    - slide: The OpenSlide slide object.
    - patch_size: Desired size of each patch (patch will be of size patch_size x patch_size).
    - level: Desired level or magnification.
    
    Yields:
    - Batches of patches.
    """
    patches = []
    batch_size = determine_batch_size(patch_size, slide.level_dimensions[level])
    
    for patch in efficient_patch_generator_with_padding(slide, patch_size, level):
        patches.append(patch)
        
        if len(patches) == batch_size:
            yield np.stack(patches, axis=0)
            patches = []
    
    # If there are remaining patches that didn't form a complete batch, yield them
    if patches:
        yield np.stack(patches, axis=0)

In [None]:
def sample_patches(slide, polygons, patch_size, num_patches, inside=True):
    patches = []
    patch_origins = []  # To keep track of the top-left corner of each patch for labeling purposes

    for _ in range(num_patches):
        while True:
            x = random.randint(0, slide.dimensions[0] - patch_size)
            y = random.randint(0, slide.dimensions[1] - patch_size)
            patch_polygon = Polygon([(x,y), (x+patch_size, y), (x+patch_size, y+patch_size), (x, y+patch_size)])
            
            if inside:
                if any(patch_polygon.intersects(Polygon(p)) for p in polygons):
                    break
            else:
                if all(not patch_polygon.intersects(Polygon(p)) for p in polygons):
                    break

        patch = np.array(slide.read_region((x,y), 0, (patch_size, patch_size)))
        patches.append(patch)
        patch_origins.append((x, y))
    return patches, patch_origins

In [None]:
def efficient_sample_patches(slide, polygons, patch_size, num_patches, inside=True):
    patches = []
    patch_origins = []

    for _ in range(num_patches):
        if inside:
            # Sample from inside the ROI
            polygon = random.choice(polygons)  # Choose a random polygon
            poly_obj = Polygon(polygon)
            
            # Ensure we don't end up in an infinite loop if patch is bigger than ROI
            if poly_obj.area < patch_size * patch_size:
                continue

            while True:
                minx, miny, maxx, maxy = poly_obj.bounds
                x = random.uniform(minx, maxx - patch_size)
                y = random.uniform(miny, maxy - patch_size)
                
                patch_polygon = box(x, y, x + patch_size, y + patch_size)
                
                if patch_polygon.within(poly_obj):
                    break

        else:
            # Sample from outside the ROI
            while True:
                x = random.uniform(0, slide.dimensions[0] - patch_size)
                y = random.uniform(0, slide.dimensions[1] - patch_size)
                
                patch_polygon = box(x, y, x + patch_size, y + patch_size)
                
                if all(not patch_polygon.intersects(Polygon(p)) for p in polygons):
                    break

        patch = np.array(slide.read_region((x, y), 0, (patch_size, patch_size)))
        patches.append(patch)
        patch_origins.append((x, y))
    
    return patches, patch_origins

In [None]:
def modified_sample_patches(slide, polygons, patch_size, num_patches, inside=True):
    patches = []
    patch_origins = []

    half_patch_size = patch_size // 2

    for _ in range(num_patches):
        if inside:
            # Sample from inside the ROI
            polygon = random.choice(polygons)  # Choose a random polygon
            poly_obj = Polygon(polygon)
            
            # Ensure we don't end up in an infinite loop if patch is bigger than ROI
            if poly_obj.area < patch_size * patch_size:
                continue

            while True:
                minx, miny, maxx, maxy = poly_obj.bounds
                center_x = random.uniform(minx + half_patch_size, maxx - half_patch_size)
                center_y = random.uniform(miny + half_patch_size, maxy - half_patch_size)
                
                x = center_x - half_patch_size
                y = center_y - half_patch_size

                patch_polygon = box(x, y, x + patch_size, y + patch_size)
                
                if patch_polygon.within(poly_obj):
                    break

        else:
            # Sample from outside the ROI
            while True:
                x = random.uniform(0, slide.dimensions[0] - patch_size)
                y = random.uniform(0, slide.dimensions[1] - patch_size)
                
                patch_polygon = box(x, y, x + patch_size, y + patch_size)
                
                if all(not patch_polygon.intersects(Polygon(p)) for p in polygons):
                    break

        patch = np.array(slide.read_region((int(x), int(y)), 0, (patch_size, patch_size)))
        patches.append(patch)
        patch_origins.append((x, y))
    
    return patches, patch_origins