In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys

import openslide
from openslide import deepzoom
import cv2 as cv
from PIL import Image, ImageDraw
from tqdm import tqdm
import time

In [2]:
project = "BRCA"

In [3]:
#path_to_slides = "/../../Volumes/TaiHoang5T2/%s_slides_data/"%project
#path_to_slides = "/../../Volumes/TaiHoang5T4/TCGA_slides/%s_slides_data/"%project
path_to_slides = "/../../Volumes/TaiHoang5T1/%s_slides_data/"%project
#path_to_slides = "%s_slides/"%project
#print("path_to_slides:", path_to_slides)

path_to_metadata = "../11maching_slide_RNA"
#print("path_to_metadata:", path_to_metadata)

magnification = 20
tile_size = 512
bg_thres = 0.5            ## exclude tiles with more than 50% of background
mask_downsampling = 16

metadata_file = path_to_metadata + "/" + "%s_slide_RNA_matched.csv"%project
#print("metadata_file:", metadata_file)

metadata = pd.read_csv(metadata_file)

In [7]:
#i_slide = int(sys.argv[1])
for i_slide in range(500,800):
    print("i_slide:", i_slide)

    slide_id = metadata.slide_id[i_slide]
    slide_name = metadata.slide_name[i_slide]

    slide_path = path_to_slides + slide_id + "/" + slide_name
    #print("slide_path:", slide_path)

    ## create tile_folder:
    tile_folder = "tiles/%s_"%project + str(i_slide).zfill(5) + "_" + slide_id
    #print("tile_folder:", tile_folder)
    if not os.path.exists(tile_folder):
        os.makedirs(tile_folder)

    start_time = time.time()
    ##======================================================================================================

    slide = openslide.OpenSlide(slide_path)

    ## magnification max
    if openslide.PROPERTY_NAME_OBJECTIVE_POWER in slide.properties:
        magnification_max = slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]
    else:
        print('[WARNING] openslide.PROPERTY_NAME_OBJECTIVE_POWER was not found. Assuming it is 40X, i_slide:', i_slide)
        magnification_max = 40

    #print("magnification_max:", magnification_max)

    ## downsample_level
    downsampling = int(int(magnification_max)/magnification)
    #print("downsampling:", downsampling)

    slide_gen = deepzoom.DeepZoomGenerator(slide, tile_size=tile_size, overlap=0)
    #print(slide_gen.level_dimensions)

    slide_gen_levels = [2**i for i in range(0, slide_gen.level_count)][::-1]
    #print("slide_gen_levels:", slide_gen_levels)

    slide_gen_selectedlevel_idx = slide_gen_levels.index(downsampling)
    #print("slide_gen_selectedlevel_idx:", slide_gen_selectedlevel_idx)

    slide_gen_selectedlevel_dims = slide_gen.level_dimensions[slide_gen_selectedlevel_idx]
    #print("slide_gen_selectedlevel_dims:", slide_gen_selectedlevel_dims)
    #print("slide_gen.level_tiles:", slide_gen.level_tiles)

    #n_cols,n_rows = slide_gen.level_tiles[slide_gen_selectedlevel_idx]
    n_cols,n_rows = int(slide_gen_selectedlevel_dims[0]/tile_size),int(slide_gen_selectedlevel_dims[1]/tile_size)
    print("n_cols,n_rows:", n_cols,n_rows)

    n_tiles = n_cols*n_rows
    print("n_tiles:", n_tiles)

    ##======================================================================================================
    # ### Mask
    ### Downsampling
    best_downsampling_level = slide.get_best_level_for_downsample(mask_downsampling + 0.1)
    #print("best_downsampling_level:", best_downsampling_level)
    #print("slide.level_dimensions[best_downsampling_level]:", slide.level_dimensions[best_downsampling_level])

    # image at the best level
    img_best_level = slide.read_region((0, 0), best_downsampling_level, 
                               slide.level_dimensions[best_downsampling_level])
    #print(img_best_level.size)

    target_size = tuple([int(x//mask_downsampling) for x in slide.dimensions])
    #print("target_size:", target_size)

    ## resize the image to the target size
    img = img_best_level.resize(target_size)
    #print("img.size:",img.size)

    ## remove the alpha channel and convert to numpy
    img_RGB = np.array(img.convert("RGB"))
    print("img.shape - RGB:", img_RGB.shape)
    #plt.imshow(img_RGB)

    ##----------------------------------------------------------
    nx,ny = 1,1
    fig, ax = plt.subplots(ny,nx,figsize=(nx*4.6,ny*6))

    #ax.imshow(img_RGB[100:2500,500:2700])
    ax.imshow(img_RGB)
    ax.axis("off")

    #plt.colorbar(im,ax=ax[1],fraction=0.045, pad=0.05,ticks=[-1.0,0,1.0])

    #plt.tight_layout(h_pad=1, w_pad= 0.5)
    plt.tight_layout(h_pad=0, w_pad= 0.0)
    plt.savefig("slides_all/slide_%s.pdf"%(i_slide), format='pdf', dpi=50, bbox_inches='tight')
    plt.close()

    print("--- completed i_slide ---:", i_slide)

i_slide: 500
n_cols,n_rows: 106 76
n_tiles: 8056
img.shape - RGB: (4893, 6842, 3)
--- completed i_slide ---: 500
i_slide: 501
n_cols,n_rows: 115 79
n_tiles: 9085
img.shape - RGB: (5083, 7421, 3)
--- completed i_slide ---: 501
i_slide: 502
n_cols,n_rows: 92 46
n_tiles: 4232
img.shape - RGB: (2978, 5950, 3)
--- completed i_slide ---: 502
i_slide: 503
n_cols,n_rows: 74 62
n_tiles: 4588
img.shape - RGB: (4027, 4760, 3)
--- completed i_slide ---: 503
i_slide: 504
n_cols,n_rows: 62 88
n_tiles: 5456
img.shape - RGB: (2832, 2000, 3)
--- completed i_slide ---: 504
i_slide: 505
n_cols,n_rows: 119 88
n_tiles: 10472
img.shape - RGB: (5661, 7616, 3)
--- completed i_slide ---: 505
i_slide: 506
n_cols,n_rows: 140 87
n_tiles: 12180
img.shape - RGB: (2784, 4481, 3)
--- completed i_slide ---: 506
i_slide: 507
n_cols,n_rows: 70 79
n_tiles: 5530
img.shape - RGB: (5056, 4522, 3)
--- completed i_slide ---: 507
i_slide: 508
n_cols,n_rows: 65 53
n_tiles: 3445
img.shape - RGB: (3442, 4200, 3)
--- completed i_s

--- completed i_slide ---: 572
i_slide: 573
n_cols,n_rows: 98 66
n_tiles: 6468
img.shape - RGB: (2128, 3152, 3)
--- completed i_slide ---: 573
i_slide: 574
n_cols,n_rows: 102 70
n_tiles: 7140
img.shape - RGB: (4514, 6545, 3)
--- completed i_slide ---: 574
i_slide: 575
n_cols,n_rows: 129 75
n_tiles: 9675
img.shape - RGB: (4838, 8319, 3)
--- completed i_slide ---: 575
i_slide: 576
n_cols,n_rows: 165 70
n_tiles: 11550
img.shape - RGB: (4524, 10591, 3)
--- completed i_slide ---: 576
i_slide: 577
n_cols,n_rows: 29 88
n_tiles: 2552
img.shape - RGB: (5641, 1904, 3)
--- completed i_slide ---: 577
i_slide: 578
n_cols,n_rows: 90 70
n_tiles: 6300
img.shape - RGB: (4488, 5771, 3)
--- completed i_slide ---: 578
i_slide: 579
n_cols,n_rows: 105 79
n_tiles: 8295
img.shape - RGB: (5107, 6723, 3)
--- completed i_slide ---: 579
i_slide: 580
n_cols,n_rows: 119 81
n_tiles: 9639
img.shape - RGB: (5241, 7660, 3)
--- completed i_slide ---: 580
i_slide: 581
n_cols,n_rows: 119 71
n_tiles: 8449
img.shape - RGB: 

img.shape - RGB: (1913, 2218, 3)
--- completed i_slide ---: 645
i_slide: 646
n_cols,n_rows: 69 83
n_tiles: 5727
img.shape - RGB: (2672, 2224, 3)
--- completed i_slide ---: 646
i_slide: 647
n_cols,n_rows: 106 83
n_tiles: 8798
img.shape - RGB: (5315, 6840, 3)
--- completed i_slide ---: 647
i_slide: 648
n_cols,n_rows: 81 66
n_tiles: 5346
img.shape - RGB: (2121, 2614, 3)
--- completed i_slide ---: 648
i_slide: 649
n_cols,n_rows: 114 84
n_tiles: 9576
img.shape - RGB: (5402, 7301, 3)
--- completed i_slide ---: 649
i_slide: 650
n_cols,n_rows: 64 68
n_tiles: 4352
img.shape - RGB: (4358, 4108, 3)
--- completed i_slide ---: 650
i_slide: 651
n_cols,n_rows: 77 28
n_tiles: 2156
img.shape - RGB: (1812, 4980, 3)
--- completed i_slide ---: 651
i_slide: 652
n_cols,n_rows: 95 77
n_tiles: 7315
img.shape - RGB: (4963, 6128, 3)
--- completed i_slide ---: 652
i_slide: 653
n_cols,n_rows: 129 91
n_tiles: 11739
img.shape - RGB: (5859, 8259, 3)
--- completed i_slide ---: 653
i_slide: 654
n_cols,n_rows: 57 63
n_

n_cols,n_rows: 61 29
n_tiles: 1769
img.shape - RGB: (1896, 3956, 3)
--- completed i_slide ---: 718
i_slide: 719
n_cols,n_rows: 122 89
n_tiles: 10858
img.shape - RGB: (5732, 7854, 3)
--- completed i_slide ---: 719
i_slide: 720
n_cols,n_rows: 108 74
n_tiles: 7992
img.shape - RGB: (4770, 6961, 3)
--- completed i_slide ---: 720
i_slide: 721
n_cols,n_rows: 94 74
n_tiles: 6956
img.shape - RGB: (4799, 6068, 3)
--- completed i_slide ---: 721
i_slide: 722
n_cols,n_rows: 154 79
n_tiles: 12166
img.shape - RGB: (5068, 9875, 3)
--- completed i_slide ---: 722
i_slide: 723
n_cols,n_rows: 73 103
n_tiles: 7519
img.shape - RGB: (3312, 2352, 3)
--- completed i_slide ---: 723
i_slide: 724
n_cols,n_rows: 80 50
n_tiles: 4000
img.shape - RGB: (3233, 5176, 3)
--- completed i_slide ---: 724
i_slide: 725
n_cols,n_rows: 112 82
n_tiles: 9184
img.shape - RGB: (5276, 7220, 3)
--- completed i_slide ---: 725
i_slide: 726
n_cols,n_rows: 112 75
n_tiles: 8400
img.shape - RGB: (2413, 3610, 3)
--- completed i_slide ---: 7

n_cols,n_rows: 90 75
n_tiles: 6750
img.shape - RGB: (4846, 5771, 3)
--- completed i_slide ---: 791
i_slide: 792
n_cols,n_rows: 81 223
n_tiles: 18063
img.shape - RGB: (7152, 2608, 3)
--- completed i_slide ---: 792
i_slide: 793
n_cols,n_rows: 117 75
n_tiles: 8775
img.shape - RGB: (4843, 7541, 3)
--- completed i_slide ---: 793
i_slide: 794
n_cols,n_rows: 117 85
n_tiles: 9945
img.shape - RGB: (5497, 7497, 3)
--- completed i_slide ---: 794
i_slide: 795
n_cols,n_rows: 95 90
n_tiles: 8550
img.shape - RGB: (5823, 6128, 3)
--- completed i_slide ---: 795
i_slide: 796
n_cols,n_rows: 82 95
n_tiles: 7790
img.shape - RGB: (3056, 2640, 3)
--- completed i_slide ---: 796
i_slide: 797
n_cols,n_rows: 72 62
n_tiles: 4464
img.shape - RGB: (3975, 4641, 3)
--- completed i_slide ---: 797
i_slide: 798
n_cols,n_rows: 79 82
n_tiles: 6478
img.shape - RGB: (5259, 5117, 3)
--- completed i_slide ---: 798
i_slide: 799
n_cols,n_rows: 69 65
n_tiles: 4485
img.shape - RGB: (4192, 4462, 3)
--- completed i_slide ---: 799
