In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys

import openslide
from openslide import deepzoom
import cv2 as cv
from PIL import Image, ImageDraw

In [2]:
project = "BRCA"
ik = 4
il = 0

In [3]:
i_slides_train_valid_test = np.load("../16patients_split/i_slides_nested_%s.npz"%project, allow_pickle=True)
i_slides_test = i_slides_train_valid_test["i_slides_test"][ik]
#print(i_slides_test)

#i_slide = i_slides_test[0]
#print("i_slide:", i_slide)

In [4]:
## load tile information and tile score:
tile = np.load("../12tiles_from_slide/tiles_selected/tiles_selected_npy/%s_tiles_selected.npy"%project, allow_pickle=True)

tile_scores = np.load("tile_scores_%s_ik%s_il%s.npy"%(project, ik, il), allow_pickle=True)

### gene names
best_genes = np.loadtxt("220Nov21BRCA_nonrot_ik4_il0/R_sorted.txt", dtype="str")
best_genes = best_genes[:,0]
#print("best_genes:", best_genes)
genes = np.loadtxt("../15select_genes/%s_gene_active90_mediansorted_names.txt"%project, dtype="str")
genes = genes[:,5]
i_genes = np.array([np.argwhere(genes == best_gene)[0][0] for best_gene in best_genes])

i_genes = i_genes[:5]

In [6]:
for i_slide in i_slides_test[:80]:
    
    ### Load tile_selected information
    n_slides = len(tile)
    print("n_slides:", n_slides)

    i_slides = np.array([tile[i][0] for i in range(n_slides)])
    #print("i_slides:", i_slides)

    i0 = np.argwhere(i_slides == i_slide)[0][0]
    print("i0:", i0)

    slide_name = tile[i0][1]
    print("slide_name:", slide_name)

    slide_level = tile[i0][2]
    print("slide_level:", slide_level)

    idx_row_col = tile[i0][3]
    print("idx_row_col.shape:", idx_row_col.shape)
    #print(idx_row_col)

    ##------------------------------------
    ### Load tile_score
    n_slides = len(tile_scores)
    print("n_slides:", n_slides)

    i_slides = np.array([tile_scores[i][0] for i in range(n_slides)])
    #print("i_slides:", i_slides)

    i0 = np.argwhere(i_slides == i_slide)[0][0]
    print("i0:", i0)

    print("i_slide:", tile_scores[i0][0])
    print("slide_name:", tile_scores[i0][1])
    scores = tile_scores[i0][2]
    print("scores.shape", scores.shape)


    ### Load image
    ##------------------------------------
    tile_size = 512
    mask_downsampling = 16
    magnification = 20

    path2slides = "../%s_slides_data_test"%project

    slide = openslide.OpenSlide("%s/%s.svs"%(path2slides, slide_name))

    ## magnification max
    if openslide.PROPERTY_NAME_OBJECTIVE_POWER in slide.properties:
        magnification_max = slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]
    else:
        print('[WARNING] openslide.PROPERTY_NAME_OBJECTIVE_POWER was not found. Assuming it is 40X, i_slide:', i_slide)
        magnification_max = 40

    #print("magnification_max:", magnification_max)

    ## downsample_level
    downsampling = int(int(magnification_max)/magnification)
    print("downsampling:", downsampling)

    ##------------------------------------

    slide_gen = deepzoom.DeepZoomGenerator(slide, tile_size=tile_size, overlap=0)
    slide_gen_selectedlevel_idx = slide_level
    print("slide_gen_selectedlevel_idx:", slide_gen_selectedlevel_idx)

    slide_gen_selectedlevel_dims = slide_gen.level_dimensions[slide_gen_selectedlevel_idx]
    print("slide_gen_selectedlevel_dims:", slide_gen_selectedlevel_dims)

    ### Downsampling
    best_downsampling_level = slide.get_best_level_for_downsample(mask_downsampling + 0.1)
    print("best_downsampling_level:", best_downsampling_level)
    print("slide.level_dimensions[best_downsampling_level]:", slide.level_dimensions[best_downsampling_level])

    # image at the best level
    img_best_level = slide.read_region((0, 0), best_downsampling_level, 
                               slide.level_dimensions[best_downsampling_level])
    print(img_best_level.size)

    target_size = tuple([int(x//mask_downsampling) for x in slide.dimensions])
    print("target_size:", target_size)

    ## resize the image to the target size
    img = img_best_level.resize(target_size)
    print("img.size:",img.size)
    #plt.imshow(img)

    ## remove the alpha channel and convert to numpy
    #img_RGB = np.array(img.convert("RGB"))
    #print("img.shape - RGB:", img_RGB.shape)
    #plt.imshow(img_RGB)

    mask_tile_size = int(np.ceil(tile_size*(downsampling/mask_downsampling)))
    print("mask_tile_size:", mask_tile_size)

    ##----------------------------------------------------------
    heatmap = np.zeros((img.size[1], img.size[0]))
    print("heatmap.shape", heatmap.shape)

    rows = idx_row_col[:,1]
    cols = idx_row_col[:,2]

    ##----------------------------------------------------------
    nx,ny = 6,1
    fig, ax = plt.subplots(ny,nx,figsize=(nx*8,ny*6))

    ax[0].imshow(img)

    for iax, i_gene in enumerate(i_genes):
        score = scores[:,i_gene]

        score = score - score.min()

        #print("score.shape:", score.shape)

        for i in range(len(rows)):
            heatmap[int(rows[i]*mask_tile_size):int((rows[i]+1)*mask_tile_size), 
                int(cols[i]*mask_tile_size):int((cols[i]+1)*mask_tile_size)] = score[i]

        #ax[iax+1].imshow(heatmap, interpolation="nearest")
        ax[iax+1].imshow(heatmap)
        #plt.clim(0,3)

    #plt.colorbar(im,ax=ax[1],fraction=0.045, pad=0.05,ticks=[-1.0,0,1.0])

    plt.tight_layout(h_pad=1, w_pad= 0.5)
    plt.savefig('heatmap_all/heatmap_slide%s.pdf'%(i_slide), format='pdf', dpi=50)
    plt.close()

    print("--- completed i_slide ---:", i_slide)

n_slides: 1033
i0: 0
slide_name: BRCA_00000_0001a1fb-f388-41c6-bfe9-ecbb10429e37
slide_level: 16
idx_row_col.shape: (4435, 3)
n_slides: 206
i0: 0
i_slide: 0
slide_name: BRCA_00000_0001a1fb-f388-41c6-bfe9-ecbb10429e37
scores.shape (4435, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 16
slide_gen_selectedlevel_dims: (61437, 36668)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (7679, 4583)
(7679, 4583)
target_size: (7679, 4583)
img.size: (7679, 4583)
mask_tile_size: 64
heatmap.shape (4583, 7679)
--- completed i_slide ---: 0
n_slides: 1033
i0: 2
slide_name: BRCA_00002_01254d79-23ab-4163-8e71-7605041c1b82
slide_level: 17
idx_row_col.shape: (1024, 3)
n_slides: 206
i0: 1
i_slide: 2
slide_name: BRCA_00002_01254d79-23ab-4163-8e71-7605041c1b82
scores.shape (1024, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 17
slide_gen_selectedlevel_dims: (69425, 21348)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (8678, 2668)
(8678, 2668

(4108, 3576)
target_size: (4108, 3576)
img.size: (4108, 3576)
mask_tile_size: 64
heatmap.shape (3576, 4108)
--- completed i_slide ---: 87
n_slides: 1033
i0: 84
slide_name: BRCA_00088_14253bab-4469-4eef-9e1b-51413982db83
slide_level: 16
idx_row_col.shape: (1249, 3)
n_slides: 206
i0: 15
i_slide: 88
slide_name: BRCA_00088_14253bab-4469-4eef-9e1b-51413982db83
scores.shape (1249, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 16
slide_gen_selectedlevel_dims: (34564, 27791)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (4320, 3473)
(4320, 3473)
target_size: (4320, 3473)
img.size: (4320, 3473)
mask_tile_size: 64
heatmap.shape (3473, 4320)
--- completed i_slide ---: 88
n_slides: 1033
i0: 96
slide_name: BRCA_00101_16d2bed8-4fe9-43ba-832a-4a193aa3309d
slide_level: 15
idx_row_col.shape: (286, 3)
n_slides: 206
i0: 16
i_slide: 101
slide_name: BRCA_00101_16d2bed8-4fe9-43ba-832a-4a193aa3309d
scores.shape (286, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 15
s

(6240, 5554)
target_size: (6240, 5554)
img.size: (6240, 5554)
mask_tile_size: 64
heatmap.shape (5554, 6240)
--- completed i_slide ---: 162
n_slides: 1033
i0: 154
slide_name: BRCA_00165_22b53aed-2db1-498c-8a6b-cb2f995e4b62
slide_level: 16
idx_row_col.shape: (3430, 3)
n_slides: 206
i0: 30
i_slide: 165
slide_name: BRCA_00165_22b53aed-2db1-498c-8a6b-cb2f995e4b62
scores.shape (3430, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 16
slide_gen_selectedlevel_dims: (52667, 39428)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (6583, 4928)
(6583, 4928)
target_size: (6583, 4928)
img.size: (6583, 4928)
mask_tile_size: 64
heatmap.shape (4928, 6583)
--- completed i_slide ---: 165
n_slides: 1033
i0: 159
slide_name: BRCA_00170_23808d51-3e71-4f8e-ba52-6d90036408a4
slide_level: 17
idx_row_col.shape: (2751, 3)
n_slides: 206
i0: 31
i_slide: 170
slide_name: BRCA_00170_23808d51-3e71-4f8e-ba52-6d90036408a4
scores.shape (2751, 22884)
downsampling: 2
slide_gen_selectedlevel_id

(4343, 4490)
target_size: (4343, 4490)
img.size: (4343, 4490)
mask_tile_size: 64
heatmap.shape (4490, 4343)
--- completed i_slide ---: 215
n_slides: 1033
i0: 204
slide_name: BRCA_00217_2f9d23e6-9bd4-4a63-8277-3410c04010f8
slide_level: 16
idx_row_col.shape: (3033, 3)
n_slides: 206
i0: 45
i_slide: 217
slide_name: BRCA_00217_2f9d23e6-9bd4-4a63-8277-3410c04010f8
scores.shape (3033, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 16
slide_gen_selectedlevel_dims: (64637, 46881)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (8079, 5860)
(8079, 5860)
target_size: (8079, 5860)
img.size: (8079, 5860)
mask_tile_size: 64
heatmap.shape (5860, 8079)
--- completed i_slide ---: 217
n_slides: 1033
i0: 205
slide_name: BRCA_00218_300b6dbe-bff1-4536-ac1b-edc898dc821e
slide_level: 16
idx_row_col.shape: (1749, 3)
n_slides: 206
i0: 46
i_slide: 218
slide_name: BRCA_00218_300b6dbe-bff1-4536-ac1b-edc898dc821e
scores.shape (1749, 22884)
downsampling: 2
slide_gen_selectedlevel_id

(2614, 2844)
target_size: (2614, 2844)
img.size: (2614, 2844)
mask_tile_size: 32
heatmap.shape (2844, 2614)
--- completed i_slide ---: 269
n_slides: 1033
i0: 261
slide_name: BRCA_00274_406aa150-f7b8-4b36-ba50-dfbd6ddff42d
slide_level: 16
idx_row_col.shape: (2667, 3)
n_slides: 206
i0: 60
i_slide: 274
slide_name: BRCA_00274_406aa150-f7b8-4b36-ba50-dfbd6ddff42d
scores.shape (2667, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 16
slide_gen_selectedlevel_dims: (55216, 46600)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (6901, 5824)
(6901, 5824)
target_size: (6901, 5824)
img.size: (6901, 5824)
mask_tile_size: 64
heatmap.shape (5824, 6901)
--- completed i_slide ---: 274
n_slides: 1033
i0: 282
slide_name: BRCA_00295_48ab91f1-dd5c-4e67-8e52-61334afcb1fc
slide_level: 17
idx_row_col.shape: (3264, 3)
n_slides: 206
i0: 61
i_slide: 295
slide_name: BRCA_00295_48ab91f1-dd5c-4e67-8e52-61334afcb1fc
scores.shape (3264, 22884)
downsampling: 1
slide_gen_selectedlevel_id

(5236, 4991)
target_size: (5236, 4991)
img.size: (5236, 4991)
mask_tile_size: 64
heatmap.shape (4991, 5236)
--- completed i_slide ---: 366
n_slides: 1033
i0: 361
slide_name: BRCA_00374_5c4ef2a4-9bd0-43bb-829c-8ce975603a9d
slide_level: 18
idx_row_col.shape: (1313, 3)
n_slides: 206
i0: 75
i_slide: 374
slide_name: BRCA_00374_5c4ef2a4-9bd0-43bb-829c-8ce975603a9d
scores.shape (1313, 22884)
downsampling: 2
slide_gen_selectedlevel_idx: 18
slide_gen_selectedlevel_dims: (83968, 210048)
best_downsampling_level: 2
slide.level_dimensions[best_downsampling_level]: (10496, 26256)
(10496, 26256)
target_size: (10496, 26256)
img.size: (10496, 26256)
mask_tile_size: 64
heatmap.shape (26256, 10496)
--- completed i_slide ---: 374
n_slides: 1033
i0: 363
slide_name: BRCA_00376_5ca8d2a8-827a-448d-a3a7-f9b965f925a4
slide_level: 16
idx_row_col.shape: (1097, 3)
n_slides: 206
i0: 76
i_slide: 376
slide_name: BRCA_00376_5ca8d2a8-827a-448d-a3a7-f9b965f925a4
scores.shape (1097, 22884)
downsampling: 1
slide_gen_selec