In [3]:
import numpy as np
import matplotlib.pyplot as plt
import openslide
import cv2 as cv
from define_dataset import define_dataset
from color_distance_bins import color_distance_bins
import time
from define_ext_dataset import define_ext_dataset


In [4]:
ext_dataset = define_ext_dataset()
ext_ts_set = ext_dataset.data

num_positive_ext = ext_dataset.num_positives
num_negatives_ext = ext_dataset.num_negatives

In [5]:
def calculate_pdl1_score_bins(slide_name, level, ROI_threshold, base_brown, num_bins, use_normalized=False,
                              use_manual_art=False):
    
    wsi_path = '../WSI/ext_wsi/'+slide_name+'/'+slide_name+'.tiff'
    if use_normalized:
        ROI_map_path = '../WSI/ext_wsi/'+slide_name+'/roi_'+str(ROI_threshold)+'_enhanced.npy'
    else:
        #loads ROI saved array with indicated threshold applied
        ROI_map_path = '../WSI/ext_wsi/'+slide_name+'/roi_'+str(ROI_threshold)+'.npy'
        
    if use_manual_art:
        ROI_map_path = '../WSI/ext_wsi/'+slide_name+'/roi_'+str(ROI_threshold)+'_manual.npy'
    
    ROI_map_arr = np.load(ROI_map_path)
    
    wsi = openslide.OpenSlide(wsi_path)
    width_in_tiles = ROI_map_arr.shape[1]
    height_in_tiles = ROI_map_arr.shape[0]
    num_ROI_tiles = np.sum(ROI_map_arr>0)

    full_tile_size = int(wsi.dimensions[0]/width_in_tiles)
    level_tile_size = int(full_tile_size/(2**level))
    bins = np.zeros(num_bins)
    for x in range(width_in_tiles):
        print(wsi_path,': col ', x, ' of ', width_in_tiles, end="\r")
        for y in range(height_in_tiles):
            if ROI_map_arr[y][x] == 1:
                tile = wsi.read_region((x*full_tile_size,y*full_tile_size), 0, (full_tile_size,full_tile_size))
                #downsample tile
                scaled_tile = tile.resize((level_tile_size, level_tile_size))                                
                bins = np.add(bins,color_distance_bins(scaled_tile, base_brown, num_bins=num_bins))
    print('\n')
    
    total_ROI_pixel_area = (level_tile_size*level_tile_size)*num_ROI_tiles
    bins =  np.multiply(np.divide(bins, float(total_ROI_pixel_area)), 100)
    return bins

In [4]:
level = 2
ROI_threshold_arr = [0.95]
base_brown = [117.3, 88.9, 67.3]
use_normalized = True
num_bins_arr = [100]

for ROI_threshold in ROI_threshold_arr:
    for num_bins in num_bins_arr:
        start_time = time.time()

        wsi_bins_arr = np.empty([len(ext_ts_set), num_bins])

        for i, slide_name in enumerate(ext_ts_set):
            bins = calculate_pdl1_score_bins(slide_name, level, ROI_threshold, base_brown, num_bins, use_normalized=use_normalized)
            wsi_bins_arr[i] = bins

        end_time = time.time()
        print('time: ', "{:.0f}".format(end_time - start_time))

        np.save('../ML_models/bins/bins_arr_test_3ext'+str(num_bins)+'r'+str(ROI_threshold)+'.npy', wsi_bins_arr)

../WSI/ext_wsi/M-834-21 SP142/M-834-21 SP142.tiff : col  325  of  326

../WSI/ext_wsi/M-534-20-1 SP142/M-534-20-1 SP142.tiff : col  311  of  312

../WSI/ext_wsi/M-527-20 MA2-SP142/M-527-20 MA2-SP142.tiff : col  165  of  166

../WSI/ext_wsi/M-527-20 MA1-SP142/M-527-20 MA1-SP142.tiff : col  196  of  197

../WSI/ext_wsi/M-360-20 SP142/M-360-20 SP142.tiff : col  366  of  367

../WSI/ext_wsi/M-313-20 SP142/M-313-20 SP142.tiff : col  208  of  209

../WSI/ext_wsi/M-182-20-A1 SP142/M-182-20-A1 SP142.tiff : col  218  of  219

../WSI/ext_wsi/M-180-20 SP142/M-180-20 SP142.tiff : col  150  of  151

../WSI/ext_wsi/M-67-21-1-SP142 (4)/M-67-21-1-SP142 (4).tiff : col  244  of  245

../WSI/ext_wsi/M-3784-20-A1 SP142/M-3784-20-A1 SP142.tiff : col  335  of  336

time:  4881


In [5]:
level = 2
ROI_threshold_arr = [0.95]
base_brown = [117.3, 88.9, 67.3]
use_normalized = True
num_bins_arr = [40]

for ROI_threshold in ROI_threshold_arr:
    for num_bins in num_bins_arr:
        start_time = time.time()

        wsi_bins_arr = np.empty([len(ext_ts_set), num_bins])

        for i, slide_name in enumerate(ext_ts_set):
            bins = calculate_pdl1_score_bins(slide_name, level, ROI_threshold, base_brown, num_bins, use_normalized=use_normalized)
            wsi_bins_arr[i] = bins

        end_time = time.time()
        print('time: ', "{:.0f}".format(end_time - start_time))

        np.save('../ML_models/bins/bins_arr_test_3ext'+str(num_bins)+'r'+str(ROI_threshold)+'.npy', wsi_bins_arr)

../WSI/ext_wsi/M-834-21 SP142/M-834-21 SP142.tiff : col  325  of  326

../WSI/ext_wsi/M-534-20-1 SP142/M-534-20-1 SP142.tiff : col  311  of  312

../WSI/ext_wsi/M-527-20 MA2-SP142/M-527-20 MA2-SP142.tiff : col  165  of  166

../WSI/ext_wsi/M-527-20 MA1-SP142/M-527-20 MA1-SP142.tiff : col  196  of  197  of  197

../WSI/ext_wsi/M-360-20 SP142/M-360-20 SP142.tiff : col  366  of  367

../WSI/ext_wsi/M-313-20 SP142/M-313-20 SP142.tiff : col  208  of  209

../WSI/ext_wsi/M-182-20-A1 SP142/M-182-20-A1 SP142.tiff : col  218  of  219

../WSI/ext_wsi/M-180-20 SP142/M-180-20 SP142.tiff : col  150  of  151

../WSI/ext_wsi/M-67-21-1-SP142 (4)/M-67-21-1-SP142 (4).tiff : col  244  of  245

../WSI/ext_wsi/M-3784-20-A1 SP142/M-3784-20-A1 SP142.tiff : col  335  of  336

time:  4850


In [6]:
level = 2
ROI_threshold_arr = [0.9]
base_brown = [117.3, 88.9, 67.3]
use_normalized = True
num_bins_arr = [40]

for ROI_threshold in ROI_threshold_arr:
    for num_bins in num_bins_arr:
        start_time = time.time()

        wsi_bins_arr = np.empty([len(ext_ts_set), num_bins])

        for i, slide_name in enumerate(ext_ts_set):
            bins = calculate_pdl1_score_bins(slide_name, level, ROI_threshold, base_brown, num_bins, use_normalized=use_normalized)
            wsi_bins_arr[i] = bins

        end_time = time.time()
        print('time: ', "{:.0f}".format(end_time - start_time))

        np.save('../ML_models/bins/bins_arr_test_3ext'+str(num_bins)+'r'+str(ROI_threshold)+'.npy', wsi_bins_arr)

../WSI/ext_wsi/M-834-21 SP142/M-834-21 SP142.tiff : col  325  of  326

../WSI/ext_wsi/M-534-20-1 SP142/M-534-20-1 SP142.tiff : col  311  of  312

../WSI/ext_wsi/M-527-20 MA2-SP142/M-527-20 MA2-SP142.tiff : col  165  of  166  of  166

../WSI/ext_wsi/M-527-20 MA1-SP142/M-527-20 MA1-SP142.tiff : col  196  of  197

../WSI/ext_wsi/M-360-20 SP142/M-360-20 SP142.tiff : col  366  of  367

../WSI/ext_wsi/M-313-20 SP142/M-313-20 SP142.tiff : col  208  of  209

../WSI/ext_wsi/M-182-20-A1 SP142/M-182-20-A1 SP142.tiff : col  218  of  219 219

../WSI/ext_wsi/M-180-20 SP142/M-180-20 SP142.tiff : col  150  of  151

../WSI/ext_wsi/M-67-21-1-SP142 (4)/M-67-21-1-SP142 (4).tiff : col  244  of  245

../WSI/ext_wsi/M-3784-20-A1 SP142/M-3784-20-A1 SP142.tiff : col  335  of  336

time:  4436


# Manual artifact elimination roi 

In [10]:
level = 2
ROI_threshold_arr = [0.95]
base_brown = [117.3, 88.9, 67.3]
use_normalized = True
num_bins_arr = [100]
use_manual_art = True

artifact_set = ['M-1163-20 SP142']

for ROI_threshold in ROI_threshold_arr:
    for num_bins in num_bins_arr:
        start_time = time.time()
        for i, slide_name in enumerate(artifact_set):
            bins = calculate_pdl1_score_bins(slide_name, level, ROI_threshold,
                                             base_brown, num_bins, use_manual_art=use_manual_art)
            np.save('../ML_models/bins/manual_ext/'+slide_name+'_bins_'+str(num_bins)+'r'+str(ROI_threshold)+'.npy', bins)

        end_time = time.time()
        print('time: ', "{:.0f}".format(end_time - start_time))

../WSI/ext_wsi/M-1163-20 SP142/M-1163-20 SP142.tiff : col  220  of  221 col  100  of  221 221

time:  97
