In [1]:
# header files
import math, sys, time, glob, os
from openslide import open_slide, deepzoom
from PIL import ImageStat, Image
import numpy as np
print("Header files loaded...")

Header files loaded...


In [2]:
# function for extracting patches
def patch_extraction(wsi_path, output_path, tile_size=3000):
    # read slide
    slide = open_slide(wsi_path)
    
    # using deepzoom read non-empty regions
    dz = deepzoom.DeepZoomGenerator(slide, tile_size=tile_size, overlap=0, limit_bounds=True)
    mask_tile_size = tile_size*slide.level_downsamples[0] // slide.level_downsamples[0]
    dz_level = dz.level_count-1
    
    # get filename
    filename = wsi_path.split("/")[-1]
    filename = filename.split(".")[0]
    print(filename)
    
    # read entire slide
    mask = slide.read_region((0, 0), 0, slide.level_dimensions[0]).convert("L")
    fn = lambda x : 0 if x > 200 or x < 50 else 1
    mask = mask.point(fn, mode='1')
    
    # loop through each patch of the slide of size=(tile_size, tile_size)
    for i in range(dz.level_tiles[dz_level][0]):
        for j in range(dz.level_tiles[dz_level][1]):
            coord = dz.get_tile_coordinates(dz_level, (i, j))
            print(coord)
            if coord[2] != (tile_size, tile_size):
                continue
            else:
                coord = coord[0]
            cenX = (coord[0] + tile_size * slide.level_downsamples[0]//2) // slide.level_downsamples[0]
            cenY = (coord[1] + tile_size * slide.level_downsamples[0]//2) // slide.level_downsamples[0]
            mask_region = mask.crop((cenX-(mask_tile_size//2), cenY-(mask_tile_size//2), cenX+(mask_tile_size//2), cenY+(mask_tile_size//2)))
            if ImageStat.Stat(mask_region).mean[0] > 0.5:
                tile = dz.get_tile(dz_level, (i, j)).convert("RGB")
                tile_output_path = os.path.join(output_path, filename + "_" + str(coord[0]) + '_' + str(coord[1]) + '.png')
                tile.save(tile_output_path)

In [3]:
patch_extraction(wsi_path="../../sample_ovarian_cancer_data/TCGA-25-2401.svs", output_path="../../sample_patches_5000_ovarian_cancer/")

TCGA-25-2401
((0, 0), 0, (5000, 5000))
((0, 5000), 0, (5000, 5000))
((0, 10000), 0, (5000, 5000))
((0, 15000), 0, (5000, 5000))
((0, 20000), 0, (5000, 5000))
((0, 25000), 0, (5000, 5000))
((0, 30000), 0, (5000, 5000))
((0, 35000), 0, (5000, 5000))
((0, 40000), 0, (5000, 5000))
((0, 45000), 0, (5000, 5000))
((0, 50000), 0, (5000, 5000))
((0, 55000), 0, (5000, 5000))
((0, 60000), 0, (5000, 726))
((5000, 0), 0, (5000, 5000))
((5000, 5000), 0, (5000, 5000))
((5000, 10000), 0, (5000, 5000))
((5000, 15000), 0, (5000, 5000))
((5000, 20000), 0, (5000, 5000))
((5000, 25000), 0, (5000, 5000))
((5000, 30000), 0, (5000, 5000))
((5000, 35000), 0, (5000, 5000))
((5000, 40000), 0, (5000, 5000))
((5000, 45000), 0, (5000, 5000))
((5000, 50000), 0, (5000, 5000))
((5000, 55000), 0, (5000, 5000))
((5000, 60000), 0, (5000, 726))
((10000, 0), 0, (5000, 5000))
((10000, 5000), 0, (5000, 5000))
((10000, 10000), 0, (5000, 5000))
((10000, 15000), 0, (5000, 5000))
((10000, 20000), 0, (5000, 5000))
((10000, 25000)

KeyboardInterrupt: 