In [None]:
#Patch (dimension: 256x256 pixels) generation from microscopic images (dimension: 1024x768 pixels) 
#Author: Dr Sourendra Nath Basu

In [None]:
import math
import os
import numpy as np
import openslide
from PIL import Image
from openslide import OpenSlideError
from openslide.deepzoom import DeepZoomGenerator
import pandas as pd
from scipy.ndimage import binary_fill_holes  # Updated import statement
from skimage.color import rgb2gray
from skimage.feature import canny  # Correct import for canny
from skimage.morphology import binary_closing, binary_dilation, disk  # Removed binary_fill_holes
import h5py
from skimage.transform import resize
import cv2

In [None]:
#To run in the selected virtual environment, NOT HERE
!pip install pillow scikit-image scipy

/bin/bash: /home/drsourendra/anaconda3/envs/newenv/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
# Increase the maximum allowed image size to avoid DecompressionBombWarning
# Be cautious with untrusted images to avoid potential DOS attacks
Image.MAX_IMAGE_PIXELS = None  # Removes the limit entirely; use a specific higher limit if preferred

In [None]:
def open_slide(filename):
    """Attempt to open a slide file."""
    try:
        slide = openslide.open_slide(filename)
    except (openslide.OpenSlideError, FileNotFoundError):
        slide = None
    return slide

def create_tile_generator(slide, tile_size, overlap):
    """Create a tile generator for a slide."""
    return openslide.deepzoom.DeepZoomGenerator(slide, tile_size=tile_size, overlap=overlap, limit_bounds=False)

def get_highest_zoom_level(generator):
    """Get the highest available zoom level from the generator."""
    return generator.level_count - 1  # 0-based indexing

def keep_tile(tile, tissue_threshold):
    """Determine if a tile should be kept based on tissue content."""
    edges = np.zeros(tile.shape[:2], dtype=bool)
    for channel in range(tile.shape[2]):
        edges |= canny(tile[:, :, channel])
    tile_closed = binary_closing(edges, disk(10))
    tile_dilated = binary_dilation(tile_closed, disk(10))
    tile_filled = binary_fill_holes(tile_dilated)
    percentage = tile_filled.mean()
    return percentage >= tissue_threshold

def save_coordinates_and_tile(coords_dataset, tiles_dataset, col, row, tile):
    """Save tile coordinates and tile data to an HDF5 file."""
    coords_dataset.resize(coords_dataset.shape[0] + 1, axis=0)
    tiles_dataset.resize(tiles_dataset.shape[0] + 1, axis=0)
    coords_dataset[-1] = [col, row]
    tiles_dataset[-1] = tile

def save_tile(tile, filename, col, row, slide_folder):
    """Save a tile as a tiff image in a slide-specific folder."""
    if not os.path.exists(slide_folder):
        os.makedirs(slide_folder)
    base_name = os.path.basename(filename).replace('.tiff', '')  # Modified to handle .tiff files
    save_path = os.path.join(slide_folder, f"{base_name}_tile_{col}_{row}.tiff")
    print(f"{base_name}_tile_{col}_{row}.tiff" + " saved")
    cv2.imwrite(save_path, cv2.cvtColor(tile, cv2.COLOR_RGB2BGR))

def process_slide(filename, tile_size, overlap, tissue_threshold, output_path):
    """Process an entire slide, saving tiles that meet criteria and saving images in specific folders."""
    try:
        slide = open_slide(filename)
        if slide is None:
            print(f"Failed to open slide: {filename}")
            return

        generator = create_tile_generator(slide, tile_size, overlap)
        zoom_level = get_highest_zoom_level(generator)
        cols, rows = generator.level_tiles[zoom_level]

        slide_name = os.path.splitext(os.path.basename(filename))[0]

        # Define separate directories for h5 files and patch images
        h5_save_folder = os.path.join(output_path, 'h5_files')
        patches_save_folder = os.path.join(output_path, 'patches', slide_name)

        if not os.path.exists(h5_save_folder):
            os.makedirs(h5_save_folder)
        if not os.path.exists(patches_save_folder):
            os.makedirs(patches_save_folder)

        save_path = os.path.join(h5_save_folder, f"{slide_name}.h5")

        with h5py.File(save_path, 'w') as h5_file:
            coords_dataset = h5_file.create_dataset('tile_coordinates', (0, 2), maxshape=(None, 2), dtype='int32')
            tiles_dataset = h5_file.create_dataset('tile_data', (0, tile_size, tile_size, 3), maxshape=(None, tile_size, tile_size, 3), dtype='uint8')
            num_patches_saved = 0

            for col in range(cols):
                for row in range(rows):
                    tile = np.array(generator.get_tile(zoom_level, (col, row)))
                    if tile.shape[0] < tile_size or tile.shape[1] < tile_size:
                        continue  # Skip tiles smaller than the expected size
                    if keep_tile(tile, tissue_threshold):
                        resized_tile = resize(tile, (tile_size, tile_size, 3), preserve_range=True).astype('uint8')
                        save_coordinates_and_tile(coords_dataset, tiles_dataset, col, row, resized_tile)
                        save_tile(resized_tile, filename, col, row, patches_save_folder)
                        num_patches_saved += 1

            print(f"Slide {slide_name}: Total tiles saved: {num_patches_saved}")

        return {'slide_name': slide_name, 'num_patches_saved': num_patches_saved}
    except Exception as e:
        print(f"An error occurred: {e}")


# # Example usage


In [None]:
# Example usage
if __name__ == "__main__":
    folder = "/home/drsourendra/Documents/Research Projects/LN_MET_Detect_AI/SLide-01_Images"
    save_folder = '/home/drsourendra/Documents/Research Projects/LN_MET_Detect_AI/SLide-01_Patches'
    tile_size = 256  # Adjust based on your requirements
    overlap = 0  # Adjust based on your requirements
    tissue_threshold = 0.70  # Adjust based on your requirements (Otsu)

    # Process each slide in the folder
    slide_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.tiff')]
    for slide_file in slide_files:
        process_slide(slide_file, tile_size, overlap, tissue_threshold, save_folder)