In [1]:
from glob import glob
import os as os
import matplotlib.pyplot as plt  
import matplotlib as mpl  
import numpy as np  
import openslide  
from openslide import open_slide  
from openslide.deepzoom import DeepZoomGenerator  
import pandas as pd
import scipy as scipy
%matplotlib inline

import skimage.io
import skimage.measure
import skimage.color

import matplotlib.patches as mpatches

import histomicstk as htk

from scipy.ndimage.morphology import binary_fill_holes, binary_closing, binary_dilation  

plt.rcParams['figure.figsize'] = (10, 6)  

In [2]:
directories = [name for name in os.listdir("/home/a0121533/fyp2017dsap/slides/") if os.path.isdir(os.path.join("/home/a0121533/fyp2017dsap/slides/", name))]
slide_files = []

for d in directories:
    slide_path = "/home/a0121533/fyp2017dsap/slides/" + d + "/" 
    list_of_slides = glob(slide_path + "*.svs")
    
    if len(list_of_slides) != 0:
        slide_file_name = glob(slide_path + "*.svs")[0] # one slide in each directory
        slide_files.append(slide_file_name)

In [3]:
#sanity check
print(len(directories))

327


In [4]:
#split train / test data
n_slides = len(slide_files)
n_train = int(n_slides * 0.7)
slides_train = slide_files[:n_train]
slides_test = slide_files[n_train:]

In [10]:
tile_size = 128
default_slide_level = 12

In [11]:
def not_mostly_white(tile, threshold_x):
    ''' 
    Returns true if more than X% of slide pixels are not white
    '''
    pixels = tile.getdata()
    white_thresh = (240,240,240)
    not_white = 0
    for pixel in pixels:        
        if pixel < white_thresh:
            not_white += 1
        else:
            print(pixel)
    return (not_white /len(pixels)) > threshold_x

# Load reference image for normalization
refImageFile = ('https://data.kitware.com/api/v1/file/'
                '57718cc28d777f1ecd8a883c/download')  # L1.png

imReference = skimage.io.imread(refImageFile)[:, :, :3]

# get mean and stddev of reference image in lab space
meanRef, stdRef = htk.preprocessing.color_conversion.lab_mean_std(imReference)


def normalize_slide(imInput):
    # perform reinhard color normalization
    imNmzd = htk.preprocessing.color_normalization.reinhard(imInput, meanRef, stdRef)
    return imNmzd

def find_nth(haystack, needle, n):
    start = haystack.find(needle)
    while start >= 0 and n > 1:
        start = haystack.find(needle, start+len(needle))
        n -= 1
    return start

In [12]:
def create_tiles(slide_files, threshold_x, slide_type):
    num_slides = len(slide_files)
    print("num_slides: " + str(num_slides))
    
    write_slides_path = "/home/a0121533/fyp2017dsap/tiles/temp/" + slide_type + "/"
    
    counter = 0
    invalid_slides = []
    tiles_matrix = []
    patient_id_list = []
    
    for s in slide_files:
        counter += 1
        slide_index = 0
        tiles_created = False
        
        try:           
            print("######### Opening Slide:", str(counter),"/", num_slides,"\n")
            slide = open_slide(s)  
            slide_id = s[s.rfind('/')+1:s.find('.')]
            patient_id = s[s.rfind('/')+1:s.rfind('/')+13]

            tiles = DeepZoomGenerator(slide, tile_size=tile_size, overlap=0, limit_bounds=False)  
            # overlap: adds pixels to each side

            print("Level tiles for default level: " + str(tiles.level_tiles[default_slide_level])) 
            # See how many tiles there are for default slide level of magnification.

            tiles_x = tiles.level_tiles[default_slide_level][0]
            tiles_y = tiles.level_tiles[default_slide_level][1] 

            # iterate through slide level rows and columns
            for i in range(tiles_x):
                for j in range(tiles_y):
                    tile = tiles.get_tile(default_slide_level, (i,j))
                    if tile.size == (tile_size, tile_size) and not_mostly_white(tile, threshold_x):
                        slide_index += 1
                        normalized = normalize_slide(np.asarray(tile))
                        tiles_matrix.append(normalized)                        
                        tiles_created = True
                        tile_name = write_slides_path + slide_id + "_" + str(slide_index) + ".png"
                        tile.save(tile_name, "PNG")      
            print("######### Closing Slide")
            slide.close()
            
            if tiles_created:
                patient_id_list.append(patient_id)
                
        except OSError:
            invalid_slides.append(s)
    return tiles_matrix, invalid_slides, patient_id_list

In [13]:
train_tiles, invalid_train, train_patient_id_list = create_tiles(slides_train, 0.98, "train")

num_slides: 228
######### Opening Slide: 1 / 228 

Level tiles for default level: (12, 9)
######### Closing Slide
######### Opening Slide: 2 / 228 

Level tiles for default level: (9, 8)
######### Closing Slide
######### Opening Slide: 3 / 228 

Level tiles for default level: (10, 11)
######### Closing Slide
######### Opening Slide: 4 / 228 

Level tiles for default level: (8, 9)
######### Closing Slide
######### Opening Slide: 5 / 228 

Level tiles for default level: (12, 9)
######### Closing Slide
######### Opening Slide: 6 / 228 

Level tiles for default level: (10, 4)
######### Closing Slide
######### Opening Slide: 7 / 228 

Level tiles for default level: (13, 4)
######### Closing Slide
######### Opening Slide: 8 / 228 

Level tiles for default level: (11, 11)
######### Closing Slide
######### Opening Slide: 9 / 228 

Level tiles for default level: (13, 12)
######### Closing Slide
######### Opening Slide: 10 / 228 

Level tiles for default level: (14, 12)
######### Closing Slide
#

In [None]:
test_tiles, invalid_test, test_patient_id_list = create_tiles(slides_test, 0.98, "test")

num_slides: 99
######### Opening Slide: 1 / 99 

Level tiles for default level: (10, 12)
######### Closing Slide
######### Opening Slide: 2 / 99 

Level tiles for default level: (10, 4)
######### Closing Slide
######### Opening Slide: 3 / 99 

Level tiles for default level: (16, 5)
######### Closing Slide
######### Opening Slide: 4 / 99 

Level tiles for default level: (7, 12)
######### Closing Slide
######### Opening Slide: 5 / 99 

Level tiles for default level: (6, 9)
######### Closing Slide
######### Opening Slide: 6 / 99 

Level tiles for default level: (10, 4)
######### Closing Slide
######### Opening Slide: 7 / 99 

Level tiles for default level: (16, 16)
######### Closing Slide
######### Opening Slide: 8 / 99 

Level tiles for default level: (11, 10)
######### Closing Slide
######### Opening Slide: 9 / 99 

Level tiles for default level: (10, 10)
######### Closing Slide
######### Opening Slide: 10 / 99 

Level tiles for default level: (10, 7)
######### Closing Slide
######### O