In [1]:
from glob import glob
import os as os
import matplotlib.pyplot as plt  
import matplotlib as mpl  
import numpy as np  
import openslide  
from openslide import open_slide  
from openslide.deepzoom import DeepZoomGenerator  
import pandas as pd
import scipy as scipy

from scipy.ndimage.morphology import binary_fill_holes, binary_closing, binary_dilation  

plt.rcParams['figure.figsize'] = (10, 6)  

In [2]:
tile_size = 128
default_slide_level = 12 # set to 12 by default

subject_annotations = pd.read_table('subject_annotations.txt')
sample_annotations = pd.read_table('sample_annotations.txt')

sample_id_list = sample_annotations['SAMPID']
sample_id_dict = {}

# Create dictionary of slide names and corresponding index in sample annotations txt
for i in range(0, len(sample_id_list)):
    sample_id = sample_id_list[i]
    sample_id_shortened = sample_id[0:sample_id.rfind("-", 0, sample_id.rfind("-"))]
    sample_id_dict[sample_id_shortened] = i

In [3]:
def not_mostly_white(tile, threshold_x):
    ''' 
    Returns true if more than X% of slide pixels are not white
    '''
    pixels = tile.getdata()
    white_thresh = (240,240,240)
    not_white = 0
    for pixel in pixels:        
        if pixel < white_thresh:
            not_white += 1
    return (not_white /len(pixels)) > threshold_x

In [4]:
def create_tiles(data_type, slide_type, threshold_x):
    '''
    .svs files are read with openslide library
    and are sliced into tiles (png) at default slide level: 12
    '''
    read_slides_path = "slides/" + data_type + "/" + slide_type + "s/"
    write_slides_path = "data/" + data_type + "/" + slide_type + "s/"
    
    invalid_slides = []
    
    slides = glob(read_slides_path + "*.svs")  
    #print("Slides: " + str(slides) + "\n")
    num_slides = len(slides)
    print("num_slides: " + str(num_slides))
    
    num_saved = 0
    
    for slide_num in range(num_slides):
        slide_index = 0
        s = slides[slide_num]
        slide_name = s[s.rfind('/')+1:s.rfind('.')]
        
        try:
            row_index = sample_id_dict[slide_name]
            # get Intragenic Rate: The fraction of reads that map within genes (within introns or exons)
            SMNTRART = sample_annotations.iloc[[row_index]]['SMNTRART'].values[0]
            
            if (SMNTRART > float(0)):
                print("######### Opening Slide:" + slide_name + "\n")
                slide = open_slide(s)  

                slide_directory = write_slides_path + slide_name 

                tiles = DeepZoomGenerator(slide, tile_size=tile_size, overlap=0, limit_bounds=False)  
                # overlap: adds pixels to each side

                print("Level tiles for default level: " + str(tiles.level_tiles[default_slide_level])) 
                # See how many tiles there are for default slide level of magnification.

                tiles_x = tiles.level_tiles[default_slide_level][0]
                tiles_y = tiles.level_tiles[default_slide_level][1] 

                # iterate through slide level rows and columns
                for i in range(tiles_x):
                    for j in range(tiles_y):
                        tile = tiles.get_tile(default_slide_level, (i,j))
                        if tile.size == (tile_size, tile_size) and not_mostly_white(tile, threshold_x):
                            slide_index += 1
                            num_saved += 1
                            tile_name = slide_directory + "_" + str(slide_index) + ".png"
                            tile.save(tile_name, "PNG")      
                print("######### Closing Slide:" + slides[slide_num] + "\n")
                slide.close()
        except KeyError:
            invalid_slides.append(slide_name)
    return invalid_slides

In [5]:
invalid_lung_validation_slides = create_tiles("validation", "lung", 0.8)
print(invalid_lung_validation_slides)

num_slides: 15
######### Opening Slide:GTEX-13O3O-0726

Level tiles for default level: (22, 12)
######### Closing Slide:slides/validation/lungs/GTEX-13O3O-0726.svs

######### Opening Slide:GTEX-13JUV-0526

Level tiles for default level: (26, 19)
######### Closing Slide:slides/validation/lungs/GTEX-13JUV-0526.svs

######### Opening Slide:GTEX-13O21-3026

Level tiles for default level: (17, 9)
######### Closing Slide:slides/validation/lungs/GTEX-13O21-3026.svs

######### Opening Slide:GTEX-13OVJ-0726

Level tiles for default level: (24, 14)
######### Closing Slide:slides/validation/lungs/GTEX-13OVJ-0726.svs

######### Opening Slide:GTEX-13JVG-1426

Level tiles for default level: (27, 22)
######### Closing Slide:slides/validation/lungs/GTEX-13JVG-1426.svs

######### Opening Slide:GTEX-13FTY-0126

Level tiles for default level: (23, 13)
######### Closing Slide:slides/validation/lungs/GTEX-13FTY-0126.svs

######### Opening Slide:GTEX-13N11-0326

Level tiles for default level: (22, 28)
#####

In [6]:
invalid_liver_validation_slides = create_tiles("validation", "liver", 0.9)
print(invalid_liver_validation_slides)

num_slides: 20
######### Opening Slide:GTEX-13VXU-0926

Level tiles for default level: (22, 17)
######### Closing Slide:slides/validation/livers/GTEX-13VXU-0926.svs

######### Opening Slide:GTEX-13SLX-1226

Level tiles for default level: (22, 12)
######### Closing Slide:slides/validation/livers/GTEX-13SLX-1226.svs

######### Opening Slide:GTEX-12ZZZ-1326

Level tiles for default level: (17, 10)
######### Closing Slide:slides/validation/livers/GTEX-12ZZZ-1326.svs

######### Opening Slide:GTEX-144GM-1326

Level tiles for default level: (25, 15)
######### Closing Slide:slides/validation/livers/GTEX-144GM-1326.svs

######### Opening Slide:GTEX-131XH-0626

Level tiles for default level: (30, 18)
######### Closing Slide:slides/validation/livers/GTEX-131XH-0626.svs

######### Opening Slide:GTEX-132NY-0926

Level tiles for default level: (26, 13)
######### Closing Slide:slides/validation/livers/GTEX-132NY-0926.svs

######### Opening Slide:GTEX-139TU-0826

Level tiles for default level: (24, 18

In [7]:
invalid_liver_train_slides = create_tiles("train", "liver", 0.9)
print(invalid_liver_train_slides)

num_slides: 136
######### Opening Slide:GTEX-13NZB-0626

Level tiles for default level: (26, 16)
######### Closing Slide:slides/train/livers/GTEX-13NZB-0626.svs

######### Opening Slide:GTEX-11NUK-1226

Level tiles for default level: (29, 19)
######### Closing Slide:slides/train/livers/GTEX-11NUK-1226.svs

######### Opening Slide:GTEX-13N2G-0926

Level tiles for default level: (29, 14)
######### Closing Slide:slides/train/livers/GTEX-13N2G-0926.svs

######### Opening Slide:GTEX-11DXY-0526

Level tiles for default level: (18, 22)
######### Closing Slide:slides/train/livers/GTEX-11DXY-0526.svs

######### Opening Slide:GTEX-12WSM-0726

Level tiles for default level: (19, 10)
######### Closing Slide:slides/train/livers/GTEX-12WSM-0726.svs

######### Opening Slide:GTEX-11GSP-0626

Level tiles for default level: (27, 20)
######### Closing Slide:slides/train/livers/GTEX-11GSP-0626.svs

######### Opening Slide:GTEX-13FTZ-0726

Level tiles for default level: (27, 18)
######### Closing Slide:sli

In [8]:
invalid_lung_train_slides = create_tiles("train", "lung", 0.8)
print(invalid_lung_train_slides)

num_slides: 136
######### Opening Slide:GTEX-131YS-0926

Level tiles for default level: (21, 14)
######### Closing Slide:slides/train/lungs/GTEX-131YS-0926.svs

######### Opening Slide:GTEX-11P7K-0326

Level tiles for default level: (21, 18)
######### Closing Slide:slides/train/lungs/GTEX-11P7K-0326.svs

######### Opening Slide:GTEX-13RTJ-1126

Level tiles for default level: (28, 20)
######### Closing Slide:slides/train/lungs/GTEX-13RTJ-1126.svs

######### Opening Slide:GTEX-1477Z-0626

Level tiles for default level: (23, 17)
######### Closing Slide:slides/train/lungs/GTEX-1477Z-0626.svs

######### Opening Slide:GTEX-145LU-0526

Level tiles for default level: (23, 15)
######### Closing Slide:slides/train/lungs/GTEX-145LU-0526.svs

######### Opening Slide:GTEX-11ZTS-1226

Level tiles for default level: (26, 12)
######### Closing Slide:slides/train/lungs/GTEX-11ZTS-1226.svs

######### Opening Slide:GTEX-13OW5-0726

Level tiles for default level: (23, 13)
######### Closing Slide:slides/tr

######### Closing Slide:slides/train/lungs/GTEX-131XE-0726.svs

######### Opening Slide:GTEX-146FQ-0926

Level tiles for default level: (21, 13)
######### Closing Slide:slides/train/lungs/GTEX-146FQ-0926.svs

######### Opening Slide:GTEX-131XH-0426

Level tiles for default level: (29, 19)
######### Closing Slide:slides/train/lungs/GTEX-131XH-0426.svs

######### Opening Slide:GTEX-12WSI-0826

Level tiles for default level: (21, 18)
######### Closing Slide:slides/train/lungs/GTEX-12WSI-0826.svs

######### Opening Slide:GTEX-131XW-1126

Level tiles for default level: (23, 17)
######### Closing Slide:slides/train/lungs/GTEX-131XW-1126.svs

######### Opening Slide:GTEX-13S86-0626

Level tiles for default level: (21, 13)
######### Closing Slide:slides/train/lungs/GTEX-13S86-0626.svs

######### Opening Slide:GTEX-11NUK-0826

Level tiles for default level: (21, 18)
######### Closing Slide:slides/train/lungs/GTEX-11NUK-0826.svs

######### Opening Slide:GTEX-12ZZY-0926

Level tiles for default l