In [None]:
import os, sys, pdb, math
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
import openslide
from numba import jit
from cluster import overlay_image

In [None]:
# Distinguish background by calculating the color difference 
@jit #for speeding up
def remove_background(slide_img, x_tile_size, y_tile_size):
    
    background_pixels = 0
   
    color_delta = 40 # Maxmium difference or distance between background color and white, can be adjusted manually
    
    white = [255, 255, 255]     
    r2, g2, b2 = white[0:3]
    
    # Iterating over image pixels in image and comparing whether they are background color
    for col in range(x_tile_size):
        for row in range(y_tile_size):
            
            # Calculating the distance between the positions of the background color and white in color space
            r1, g1, b1 = int(slide_img[row,col][0]),int(slide_img[row,col][1]),int(slide_img[row,col][2])
            rmean = int((r1 + r2) / 2)
            R = r1 - r2
            G = g1 - g2
            B = b1 - b2            
            color_dist = np.sqrt((((512+rmean)*R*R)>>8) + 4*G*G + (((767-rmean)*B*B)>>8))
            
            if color_dist < color_delta:
                background_pixels += 1
                
    return background_pixels

# Defining a function to extract images
def extract_svs_img(slide_filename):
    
    # Creating three new folders for output images
    all_images_file_path = os.getcwd() + "\\" + "All_Images"
    os.mkdir(all_images_file_path)
    tissue_images_file_path = os.getcwd() + "\\" + "Tissue_Images"
    os.mkdir(tissue_images_file_path)
    overlay_images_file_path = os.getcwd() + "\\" + "Overlay_Images" 
    os.mkdir(overlay_images_file_path)      
    
    # Opening the svs file and get the width and height of the image
    slide_file = openslide.OpenSlide(slide_filename)
    slide_width, slide_height = slide_file.dimensions 
    
    # Determining the size of the images to be extracted
    x_tile_size = 4000
    y_tile_size = 3000    
    
    slide_img = np.zeros((y_tile_size, x_tile_size, 3), np.uint8)
    
    # Calculating the number of extracted images according to the size of the original image
    x_tile_num = int(np.floor((slide_width - x_tile_size - 1) / (x_tile_size * 0.9))) + 2
    y_tile_num = int(np.floor((slide_height - y_tile_size - 1) / (y_tile_size * 0.9))) + 2        
    
    p_num, t_num = 1,1 #for naming files

    # Left to right and top to bottom
    for iy in range(y_tile_num):
        for ix in range(x_tile_num):
            
            # Coordinates of the upper left corner of each image
            start_x = int(ix * x_tile_size * 0.9) if (ix + 1) < x_tile_num else (slide_width - x_tile_size) 
            start_y = int(iy * y_tile_size * 0.9) if (iy + 1) < y_tile_num else (slide_height - y_tile_size) 
            
            # Reading the image to be extracted
            cur_tile = np.array(slide_file.read_region((start_x, start_y), 0, (x_tile_size, y_tile_size)))
            slide_img = np.array(cur_tile)[:,:,:3]
            
            # Saving as all_images
            slide_savename = os.path.splitext(slide_filename)[0] + '_P{p_num}'.format(p_num = p_num) + '.tif'
            io.imsave(all_images_file_path + "\\" + slide_savename, slide_img)
            
            # Selecting images with a background less than 70% by counting the number of white pixels 
            #and comparing with the total number of pixels
            background_pixels = remove_background(slide_img, x_tile_size, y_tile_size)
                                         
            # If the image background is less than 70%, save the image and call the overlay_image()
            if background_pixels < 0.7 * x_tile_size * y_tile_size:
                
                # Saving as tissue_images
                slide_savename = os.path.splitext(slide_filename)[0] + '_T{t_num}'.format(t_num = t_num) + '.tif'
                io.imsave(tissue_images_file_path + "\\" + slide_savename, slide_img)
            
                t_num += 1
            
            p_num += 1   

In [None]:
# Filling in the file name manually
extract_svs_img(filename)