In [1]:
import os
from pathlib import Path
import os.path as osp

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
from skimage.filters import threshold_otsu

BASE_TRUTH_DIR = Path('/home/ubuntu/data/Ground_Truth_Extracted/Mask')

def find_patches_from_slide(slide_path, base_truth_dir=BASE_TRUTH_DIR, filter_non_tissue=True):
    """Returns a dataframe of all patches in slide
    input: slide_path: path to WSI file
    output: samples: dataframe with the following columns:
        slide_path: path of slide
        is_tissue: sample contains tissue
        is_tumor: truth status of sample
        tile_loc: coordinates of samples in slide
        
    
    option: base_truth_dir: directory of truth slides
    option: filter_non_tissue: Remove samples no tissue detected
    """
    base_truth_dir = Path(base_truth_dir)
    slide_contains_tumor = osp.basename(slide_path).startswith('Tumor_')
    
    with openslide.open_slide(slide_path) as slide:
        thumbnail = slide.get_thumbnail((slide.dimensions[0] / 256, slide.dimensions[1] / 256))
    
    thumbnail_grey = np.array(thumbnail.convert('L')) # convert to grayscale
    thresh = threshold_otsu(thumbnail_grey)
    binary = thumbnail_grey > thresh
    
    patches = pd.DataFrame(pd.DataFrame(binary).stack())
    patches['is_tissue'] = ~patches[0]
    patches.drop(0, axis=1, inplace=True)
    patches['slide_path'] = slide_path
    
    if slide_contains_tumor:
        truth_slide_path = base_truth_dir / osp.basename(slide_path).replace('.tif', '_Mask.tif')
        with openslide.open_slide(str(truth_slide_path)) as truth:
            thumbnail_truth = truth.get_thumbnail((truth.dimensions[0] / 256, truth.dimensions[1] / 256)) 
        
        patches_y = pd.DataFrame(pd.DataFrame(np.array(thumbnail_truth.convert("L"))).stack())
        patches_y['is_tumor'] = patches_y[0] > 0
        patches_y.drop(0, axis=1, inplace=True)

        samples = pd.concat([patches, patches_y], axis=1)
    else:
        samples = patches
        samples['is_tumor'] = False
    
    if filter_non_tissue:
        samples = samples[samples.is_tissue == True] # remove patches with no tissue
    samples['tile_loc'] = list(samples.index)
    samples.reset_index(inplace=True, drop=True)
    return samples

In [3]:
def predict_batch_from_model(patches, model):
    """Predict which pixels are tumor.
    
    input: patch: batch_size256x256x3, rgb image
    input: model: keras model
    output: prediction: 256x256x1, per-pixel tumor probability
    """
    batch_size = patches.shape[0]
    
    predictions = model.predict(patches)
    predictions = predictions[:, :, :, 1]
    return predictions

In [4]:
from keras.models import load_model
import openslide

model = load_model('model.h5')

Using TensorFlow backend.


In [5]:
input_dir = Path('/home/ubuntu/data/viewers/Tumor_009/slide_files/18_original')
output_dir = '/home/ubuntu/data/viewers/Tumor_009/slide_files/18_heatmap'
slide_path = '/home/ubuntu/data/slides/Tumor_009.tif'

if not osp.exists(output_dir): os.makedirs(output_dir)

In [6]:
all_samples = find_patches_from_slide(slide_path, filter_non_tissue=False)
print('Total patches in slide: %d' % len(all_samples))
all_samples.iloc[:5]
all_samples.is_tumor.value_counts()

Total patches in slide: 323936


Unnamed: 0,is_tissue,slide_path,is_tumor,tile_loc
0,False,/home/ubuntu/data/slides/Tumor_009.tif,False,"(0, 0)"
1,False,/home/ubuntu/data/slides/Tumor_009.tif,False,"(0, 1)"
2,False,/home/ubuntu/data/slides/Tumor_009.tif,False,"(0, 2)"
3,False,/home/ubuntu/data/slides/Tumor_009.tif,False,"(0, 3)"
4,False,/home/ubuntu/data/slides/Tumor_009.tif,False,"(0, 4)"


False    309550
True      14386
Name: is_tumor, dtype: int64

In [7]:
assert len(all_samples) == len(list(input_dir.glob('*.png'))), \
    "Number of pngs files does not equal rows in sample df!"

In [8]:
from tqdm import tqdm
import cv2
from matplotlib import cm
import shutil

n_samples = len(all_samples)

# batch_size = n_cols
batch_size = 32

# loop through input tiles and output images
for offset in tqdm(list(range(0, n_samples, batch_size))):
    batch_samples = all_samples.iloc[offset:offset+batch_size]
    png_fnames = batch_samples.tile_loc.apply(lambda coord: str(input_dir / ('%d_%d.png' % coord[::-1])))
    
    X = np.array([plt.imread(str(p)) for p in png_fnames])
    X = X * 255.

    if batch_samples.is_tissue.nunique() == 1 and batch_samples.iloc[0].is_tissue == False:
        # all patches in this row do not have tissue,
        # instead just copy over orignal images
        for png_fname in png_fnames:
            _ = shutil.copy(png_fname, output_dir);
    else:
        # predict pixels probs
        preds = predict_batch_from_model(X, model)

        # save blended imgs
        for i, png_fname in enumerate(png_fnames):
            output_png_fname = png_fname.replace(str(input_dir), output_dir)        
            pred_i = preds[i]
            output_img = plt.imread(str(png_fname))
            alpha = 0.5            
            output_img = cv2.cvtColor(output_img, cv2.COLOR_RGB2GRAY)
            output_img2 = cv2.cvtColor(output_img.copy(), cv2.COLOR_GRAY2RGB)

            overlay = np.float32(cm.jet(pred_i.copy()))[:,:,:3]
            blended = cv2.addWeighted(overlay, alpha, output_img2, 1-alpha, 0, output_img)
            
            plt.imsave(output_png_fname, blended)

100%|██████████| 10123/10123 [1:25:27<00:00,  7.36it/s] 
