In [10]:
!conda install -c cdeepakroy openslide-python 
import csv
import os
import glob
import re
from pandas import DataFrame, Series
from openslide import open_slide
from PIL import Image
import timeit
import time
import math
import numpy as np
from scipy.ndimage.morphology import binary_fill_holes
from skimage.color import rgb2gray
from skimage.feature import canny
from skimage.morphology import binary_closing, binary_dilation, disk

Solving environment: failed

UnsatisfiableError: The following specifications were found to be in conflict:
  - jeepney
  - openslide-python
Use "conda info <package>" to see the dependencies for each package.



OSError: libopenslide.so.0: cannot open shared object file: No such file or directory

In [2]:
train_paths = ["/scratch/ss4yd/chrc_data/train/Celiac/"]


images = {}
images_by_folder = {}
for train_path in train_paths:
    images_by_folder[str(train_path)] = []
    files = glob.glob(os.path.join(train_path, '*.svs'))
    for fl in files:
        flbase = os.path.basename(fl)
        flbase_noext = os.path.splitext(flbase)[0]
        images[flbase_noext]=fl
        images_by_folder[str(train_path)].append(flbase)

In [3]:
path_change_map = {}

for key in list(images_by_folder.keys()):
    temp = key.replace('chrc_data', 'chrc_data_patches')
    path_change_map[key] = temp

In [61]:
def convert_to_3d_array(patch):
    rgb = patch.convert('RGB')
    return np.array(rgb)

def optical_density(tile):
    tile = tile.astype(np.float64)
    od = -np.log((tile+1)/240)
    return od

def keep_tile(tile, tile_size,  tissue_threshold):
    if tile.shape[0:2] == (tile_size, tile_size):
        print("inside if")
        tile_orig = tile
        tile = rgb2gray(tile)
        tile = 1 - tile
        
        tile = canny(tile)
        
        tile = binary_closing(tile, disk(10))
        tile = binary_dilation(tile, disk(10))
        tile = binary_fill_holes(tile)
        percentage1 = tile.mean()
        
        check1 = percentage1 >= tissue_threshold

        # Check 2
        # Convert to optical density values
        tile = optical_density(tile_orig)
        # Threshold at beta
        beta = 0.15
        tile = np.min(tile, axis=2) >= beta
        # Apply morphology for same reasons as above.
        tile = binary_closing(tile, disk(2))
        tile = binary_dilation(tile, disk(2))
        tile = binary_fill_holes(tile)
        percentage2 = tile.mean()
        check2 = percentage2 >= tissue_threshold
        print(percentage1, percentage2)
        return check1 and check2
    else:
        return False

def process_tile(tile, sample_size, grayscale, slide_num):
    
    """
    Process a tile into a group of smaller samples.
    Cut up a tile into smaller blocks of sample_size x sample_size pixels,
    change the shape of each sample from (H, W, channels) to
    (channels, H, W), then flatten each into a vector of length
    channels*H*W.
    Args:
      tile_tuple: A (slide_num, tile) tuple, where slide_num is an
        integer, and tile is a 3D NumPy array of shape
        (tile_size, tile_size, channels).
      sample_size: The new width and height of the square samples to be
        generated.
      grayscale: Whether or not to generate grayscale samples, rather
        than RGB.
    Returns:
      A list of (slide_num, sample) tuples representing cut up tiles,
      where each sample is a 3D NumPy array of shape
      (sample_size_x, sample_size_y, channels).
    """
    if grayscale:
        tile = rgb2gray(tile)[:, :, np.newaxis]  # Grayscale
        # Save disk space and future IO time by converting from [0,1] to [0,255],
        # at the expense of some minor loss of information.
        tile = np.round(tile * 255).astype("uint8")
    x, y, ch = tile.shape
    # 1. Reshape into a 5D array of (num_x, sample_size_x, num_y, sample_size_y, ch), where
    # num_x and num_y are the number of chopped tiles on the x and y axes, respectively.
    # 2. Swap sample_size_x and num_y axes to create
    # (num_x, num_y, sample_size_x, sample_size_y, ch).
    # 3. Combine num_x and num_y into single axis, returning
    # (num_samples, sample_size_x, sample_size_y, ch).
    samples = (tile.reshape((x // sample_size, sample_size, y // sample_size, sample_size, ch))
                   .swapaxes(1, 2)
                   .reshape((-1, sample_size, sample_size, ch)))
    samples = [(slide_num, sample) for sample in list(samples)]
    return samples


def create_patches(slide, patch_size, overlap):
    # steps to advance per axis with overlap
    step_size = patch_size - overlap
    
    # get dimensions of the image
    xlim = slide.level_dimensions[0][0]
    ylim = slide.level_dimensions[0][1]
    print("Dimensions x: " + str(xlim) + " y: " + str(ylim))
    
    # get the number of times to traverse each axis
    x_ind = math.ceil(xlim/(step_size))
    y_ind = math.ceil(ylim/(step_size))
    
    # initialize list to store patches
    patches = []
    
    # pixels left to traverse in the y-axis at the beginning of the traversal
    img_y_left = ylim
    # initialize the starting y corner
    y = 0 - step_size
    for y_ in range(y_ind):
        
        # initialize the starting x corner
        x = 0-step_size
        
        # advance the y axis (note: it starts with 0)
        y = y + step_size
        
        # pixels left to traverse in the x-axis at the beginning of the traversal
        img_x_left = xlim
        
        # update the number of pixels left to traverse
        img_y_left = img_y_left - step_size
        print('y - left: ' + str(img_y_left))
        
        # if more than patch size left, get the (patch_size x patch_size) image
        if (img_y_left > 0 and img_y_left > patch_size):
            for x_ in range(x_ind):
                x = x + (step_size)
                img_x_left = img_x_left - step_size
                print(img_x_left)
                if (img_x_left > 0 and img_x_left > patch_size):
                    patches.append(slide.read_region((x, y), 0, (patch_size, patch_size)))
                elif (img_x_left < patch_size and img_x_left > 0):
                    x = xlim - patch_size
                    patches.append(slide.read_region((x, y), 0, (patch_size, patch_size)))
                    break
        # if less than patch size left, get the rest of the image, regardless of the overlap
        elif (img_y_left > 0 and img_y_left < patch_size):
            y = ylim - patch_size
            for x_ in range(x_ind):
                x = x + (step_size)
                img_x_left = img_x_left - step_size
                print(img_x_left)
                if (img_x_left > 0 and img_x_left > patch_size):
                    patches.append(slide.read_region((x, y), 0, (patch_size, patch_size)))
                elif (img_x_left < patch_size and img_x_left > 0):
                    x = xlim - patch_size
                    patches.append(slide.read_region((x, y), 0, (patch_size, patch_size)))
                    break
            break
    
    return patches

In [62]:
def create_filtered_patches(slide, patch_size, overlap, slide_num):
    patches = create_patches(slide, patch_size, overlap)
    
    # convert to RGB from RGBA
    tiles = [convert_to_3d_array(patch) for patch in patches]
    
    filtered_tiles = [tile for tile in tiles if keep_tile(tile,patch_size, 0.30)]
    
    samples = [process_tile(tile, patch_size, False, slide_num) for tile in filtered_tiles]
    
    return samples
    
    

In [63]:
slide = open_slide(images['C07-37_05'])

In [64]:
samples = create_filtered_patches(slide, patch_size=3000, overlap=1500, slide_num='C07-37_05')

Dimensions x: 5603 y: 6478
y - left: 4978
4103
2603
y - left: 3478
4103
2603
y - left: 1978
4103
2603
inside if


  safe = ((np.issubdtype(dt, int) and dt.itemsize <= int_size) or


0.3462753333333333 0.3103207777777778
inside if
0.04852555555555556 0.040675666666666666
inside if
0.7342851111111112 0.7060967777777778
inside if
0.32853266666666664 0.3102772222222222
inside if
0.495234 0.4643152222222222
inside if
0.566612 0.543941


In [65]:
def save_nonlabelled_sample_2_jpeg(sample, save_dir):
    """
    Save the sample without labels into JPEG
    Args:
      sample_element: a sample tuple without labels, e.g. (slide_num, sample)
      save_dir: the file directory at which to save JPEGs
    """
    slide_num, img_value = sample
    filename = '{slide_num}_{hash}.jpeg'.format(
        slide_num=slide_num, hash=np.random.randint(1e4))
    filepath = os.path.join(save_dir, filename)
    save_jpeg_help(img_value, filepath)


def save_jpeg_help(img_value, filepath):
    """
     Save data into JPEG
     Args:
       img_value: the image value with the size (img_size_x, img_size_y, channels)
       file path: the file path at which to save JPEGs
     """
    dir = os.path.dirname(filepath)
    os.makedirs(dir, exist_ok=True)
    img = Image.fromarray(img_value.astype(np.uint8), 'RGB')
    img.save(filepath)


In [68]:
for sample in samples:
    save_nonlabelled_sample_2_jpeg(sample[0], './')