In [2]:
import os
import numpy as np
from PIL import Image
import cv2
from histolab.slide import Slide
from histolab.tiler import GridTiler
from histolab.handlers import GridPatchHandler
from skimage.color import rgb2hed
from skimage.filters import threshold_otsu, gaussian
from skimage.morphology import remove_small_objects, remove_small_holes
from skimage.measure import label, regionprops
import matplotlib.pyplot as plt

# ======================
# Tissue Mask Function
# ======================
def get_tissue_mask_basic(rgb_image, deconvolve_first=True, sigma=1.5, min_size=300, plot=False):
    if deconvolve_first:
        hed = rgb2hed(rgb_image)
        hema = -hed[:, :, 0]
        hema_norm = (hema - np.min(hema)) / (np.max(hema) - np.min(hema))
        gray_image = hema_norm
    else:
        gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) / 255.0

    smooth = gaussian(gray_image, sigma=sigma)
    thresh = threshold_otsu(smooth)
    binary_mask = smooth > thresh
    cleaned = remove_small_objects(binary_mask, min_size=min_size)
    cleaned = remove_small_holes(cleaned, area_threshold=min_size)
    labeled = label(cleaned)

    if plot:
        fig, axs = plt.subplots(1, 5, figsize=(20, 4))
        axs[0].imshow(rgb_image)
        axs[0].set_title('Original RGB')
        axs[1].imshow(gray_image, cmap='gray')
        axs[1].set_title('Hematoxylin / Grayscale')
        axs[2].imshow(smooth, cmap='gray')
        axs[2].set_title('Gaussian Smoothed')
        axs[3].imshow(binary_mask, cmap='gray')
        axs[3].set_title('Otsu Threshold')
        axs[4].imshow(cleaned, cmap='gray')
        axs[4].set_title('Cleaned Tissue Mask')
        for ax in axs:
            ax.axis('off')
        plt.tight_layout()
        plt.show()

    return labeled, cleaned.astype(np.uint8)

# ======================
# Pen Mark Removal
# ======================
def detect_pen_mask(rgb_img):
    bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
    lower_blue = np.array([100, 50, 50])
    upper_blue = np.array([130, 255, 255])
    mask = cv2.inRange(hsv, lower_blue, upper_blue)
    kernel = np.ones((3, 3), np.uint8)
    return cv2.dilate(mask, kernel, iterations=1)

def inpaint_patch(patch):
    pen_mask = detect_pen_mask(patch)
    bgr = cv2.cvtColor(patch, cv2.COLOR_RGB2BGR)
    inpainted = cv2.inpaint(bgr, pen_mask, 3, cv2.INPAINT_TELEA)
    return cv2.cvtColor(inpainted, cv2.COLOR_BGR2RGB)

# ======================
# Main Pipeline Function
# ======================
def process_wsi(wsi_path, output_root, patch_size=224):
    slide = Slide(wsi_path, processed_path="processed")
    slide_name = slide.name
    print(f"Processing: {slide_name}")

    # Step 1: Compute tissue mask on thumbnail
    thumb = np.array(slide.thumbnail)
    labeled, tissue_mask = get_tissue_mask_basic(thumb, plot=True)

    # Step 2: Get bounding box of largest tissue area
    props = regionprops(labeled)
    if not props:
        print("❌ No tissue found.")
        return
    largest = max(props, key=lambda r: r.area)
    minr, minc, maxr, maxc = largest.bbox

    full_w, full_h = slide.dimensions
    scale_x = full_w / thumb.shape[1]
    scale_y = full_h / thumb.shape[0]

    x_min = int(minc * scale_x)
    y_min = int(minr * scale_y)
    x_max = int(maxc * scale_x)
    y_max = int(maxr * scale_y)

    print(f"🧠 Tissue Bounding Box (level 0): ({x_min}, {y_min}) to ({x_max}, {y_max})")

    # Step 3: Set up tiler and handler
    tiler = GridTiler(
        tile_size=(patch_size, patch_size),
        level=0,
        check_tissue=True,
        tissue_percent=70,
        pixel_overlap=0
    )

    class CleanedPatchHandler(GridPatchHandler):
        def process(self, patch, location, slide):
            patch_np = np.array(patch)
            if np.mean(patch_np) > 240:
                return  # Skip mostly white patches
            cleaned = inpaint_patch(patch_np)
            cleaned_img = Image.fromarray(cleaned)
            save_name = f"{location[0]}_{location[1]}.png"
            cleaned_img.save(os.path.join(self.output_path, save_name))

    patch_output_dir = os.path.join(output_root, slide_name)
    os.makedirs(patch_output_dir, exist_ok=True)

    handler = CleanedPatchHandler(output_path=patch_output_dir)

    # Step 4: Extract patches inside bounding box
    tiler.extract(slide, handler=handler, regions=[(x_min, y_min, x_max, y_max)])




ModuleNotFoundError: No module named 'histolab.handlers'

In [2]:
import sys
print(sys.executable)


/cluster/home/srivash/miniforge3/envs/histolab/bin/python3.10


In [3]:
pip show histolab


Name: histolab
Version: 0.7.0
Summary: Python library for Digital Pathology Image Processing
Home-page: https://github.com/histolab/histolab
Author: E. Arbitrio, N. Bussola, A. Marcolini
Author-email: 
License: Apache-2.0
Location: /cluster/home/srivash/miniforge3/envs/histolab/lib/python3.10/site-packages
Requires: certifi, numpy, openslide-python, Pillow, scikit-image, scipy
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
data_path = '/lab/deasylab3/Jung/tiger/'
dir_TIFF_images = data_path + "/wsirois/wsi-level-annotations/images/"


imgs_names = os.listdir(dir_TIFF_images)
imgs_names.sort()
imgs_names = [i for i in imgs_names if i.startswith('TCGA')]  
wsi_path = dir_TIFF_images + imgs_names[0]
wsi_path

In [None]:
# ======================
# Run Pipeline
# ======================
if __name__ == "__main__":
    data_path = '/lab/deasylab3/Jung/tiger/wsirois/wsi-level-annotations/images/'
    output_path = 'cleaned_patches_grid/'
    os.makedirs(output_path, exist_ok=True)

    process_wsi_with_grid(os.path.join(data_path, file), output_path)