In [1]:
import numpy as np
import matplotlib.pyplot as plt
import openslide
import cv2 as cv
from define_dataset import define_dataset
from color_distance import color_distance
import time

In [2]:
dataset = define_dataset()
tr_set = dataset.tr_set
test_set = dataset.test_set
results_dict = dataset.results_dict
num_positives = dataset.num_positives
num_negatives = dataset.num_negatives

FileNotFoundError: [Errno 2] No such file or directory: '../WSI/thesis_WSI/resultsWSI.ods'

In [3]:
def calculate_pdl1_score(slide_name, level, ROI_threshold, threshold_brown, base_brown, use_normalized=False):
    
    wsi_path = '../WSI/thesis_WSI/'+slide_name+'/'+slide_name+'.tiff'
    if use_normalized:
        ROI_map_path = '../WSI/thesis_WSI/'+slide_name+'/roi_'+str(ROI_threshold)+'_enhanced.npy'
    else:
        #loads ROI saved array with indicated threshold applied
        ROI_map_path = '../WSI/thesis_WSI/'+slide_name+'/roi_'+str(ROI_threshold)+'.npy'
    
    
    ROI_map_arr = np.load(ROI_map_path)
    
    wsi = openslide.OpenSlide(wsi_path)
    width_in_tiles = ROI_map_arr.shape[1]
    height_in_tiles = ROI_map_arr.shape[0]
    num_ROI_tiles = np.sum(ROI_map_arr>0)

    
    full_tile_size = int(wsi.dimensions[0]/width_in_tiles)
    level_tile_size = int(full_tile_size/(2**level))
    num_pdl1_pixel = 0
    for x in range(width_in_tiles):
        print(wsi_path,': col ', x, ' of ', width_in_tiles, end="\r")
        for y in range(height_in_tiles):
            if ROI_map_arr[y][x] == 1:
                tile = wsi.read_region((x*full_tile_size,y*full_tile_size), level, (level_tile_size,level_tile_size))
                num_pdl1_pixel += color_distance(tile, base_brown, threshold_brown)
    print('\n')
    total_ROI_pixel_area = (level_tile_size*level_tile_size)*num_ROI_tiles
    pdl1_ratio = num_pdl1_pixel/total_ROI_pixel_area
    return pdl1_ratio

In [26]:
def find_classification_threshold(perc_arr, slide_set):
    min_errors_threshold = 0
    min_errors = len(slide_set)
    
    #set threshold at every score in result percentages array
    for perc in perc_arr:
        classification_threshold = round(perc, 5)
        errors = 0

        for i, slide_name in enumerate(slide_set):
            score = perc_arr[i]
            if score < classification_threshold:
                if results_dict[slide_name] != 0:
                    #print(slide_name,"%.2f%%" % (score*100), ': negative ok')
                    #print(slide_name,"%.2f%%" % (score*100),  ': not ok, should be positive')
                    errors+=1
            else:
                if results_dict[slide_name] != 1:
                    #print(slide_name,"%.2f%%" % (score*100),  ': positive ok')
                    #print(slide_name,"%.2f%%" % (score*100),  ': not ok, should be negative')
                    errors+=1
        
        if errors <= min_errors:
            min_errors = errors
            min_errors_threshold = classification_threshold
        
    return (min_errors_threshold, min_errors)

   

In [27]:
def save_results(classification_threshold, errors, start_time, end_time, level, ROI_threshold, threshold_brown, use_normalized, is_test_set=False):
    #save stats with best found tuse_normalized
    if is_test_set:
        if use_normalized:
            f = open('./base_model_test_res/normalized/r'+str(ROI_threshold)+'b'+str(threshold_brown)+'.txt', 'w')
        else:
            f = open('./base_model_test_res/not_normalized/r'+str(ROI_threshold)+'b'+str(threshold_brown)+'.txt', 'w')
    else:
        if use_normalized:
            f = open('./base_model_res/grid_search_normalized/r'+str(ROI_threshold)+'b'+str(threshold_brown)+'.txt', 'w')
        else:
            f = open('./base_model_res/grid_search/r'+str(ROI_threshold)+'b'+str(threshold_brown)+'.txt', 'w')
    f.write('level: '+str(level)+'\n')
    f.write('ROI_threshold: '+str(ROI_threshold)+'\n')
    f.write('threshold_brown: '+str(threshold_brown)+'\n')
    f.write('time: '+str(end_time-start_time)+'\n')
    f.write('classification_threshold: '+str(classification_threshold*100)+'%\n')
    f.write('errors: '+str(errors)+'\n')
    if is_test_set:
         for i, slide_name in enumerate(test_set):
            f.write(slide_name+': '+str("%.3f%%" % (perc_arr[i]*100))+'\n')
    else: 
        for i, slide_name in enumerate(tr_set):
            f.write(slide_name+': '+str("%.3f%%" % (perc_arr[i]*100))+'\n')
        
    f.close()

Grid search tr set

In [22]:
level = 2
base_brown = [117.3, 88.9, 67.3]
use_normalized = False
ROI_threshold_arr = [0.85, 0.9, 0.95]
threshold_brown_arr = [9]


for ROI_threshold in ROI_threshold_arr:
    for threshold_brown in threshold_brown_arr:
        start_time = time.time()

        perc_arr = []

        for slide_name in tr_set:
            score = calculate_pdl1_score(slide_name, level, ROI_threshold, threshold_brown, base_brown, use_normalized=use_normalized) 
            #print(slide_name, " percentage PD-L1 pixels:", "%.2f%%" % (score*100))
            perc_arr.append(score)
            
        classification_threshold, errors = find_classification_threshold(perc_arr, tr_set)        
            
        end_time = time.time()
        print('time: ', "{:.0f}".format(end_time - start_time))
        
        save_results(classification_threshold, errors, start_time, end_time, level, ROI_threshold,
                     threshold_brown, use_normalized, is_test_set=False)
        

../WSI/thesis_WSI/19-COMP-004/19-COMP-004.tiff : col  16  of  160

KeyboardInterrupt: 

Test set

In [28]:
def set_classification_threshold(classification_threshold, slide_set, perc_arr):
    errors = 0

    for i, slide_name in enumerate(slide_set):
        score = perc_arr[i]
        if score < classification_threshold:
            if results_dict[slide_name] != 0:
                #print(slide_name,"%.2f%%" % (score*100), ': negative ok')
                #print(slide_name,"%.2f%%" % (score*100),  ': not ok, should be positive')
                errors+=1
        else:
            if results_dict[slide_name] != 1:
                #print(slide_name,"%.2f%%" % (score*100),  ': positive ok')
                #print(slide_name,"%.2f%%" % (score*100),  ': not ok, should be negative')
                errors+=1
    return errors

In [29]:
level = 2
base_brown = [117.3, 88.9, 67.3]
use_normalized = True
ROI_threshold_arr = [0.85]
threshold_brown_arr = [9, 13]
classification_threshold_arr_85 = [[0.00045, 0.00075]]


for i, ROI_threshold in enumerate(ROI_threshold_arr):
    for j, threshold_brown in enumerate(threshold_brown_arr):
        start_time = time.time()

        classification_threshold = classification_threshold_arr_85[i][j]
        perc_arr = []

        for slide_name in test_set:
            score = calculate_pdl1_score(slide_name, level, ROI_threshold, threshold_brown, base_brown, use_normalized=use_normalized) 
            #print(slide_name, " percentage PD-L1 pixels:", "%.2f%%" % (score*100))
            perc_arr.append(score)
            
        end_time = time.time()
        print('time: ', "{:.0f}".format(end_time - start_time))
        
        errors = set_classification_threshold(classification_threshold, test_set, perc_arr)
        save_results(classification_threshold, errors, start_time, end_time, level, ROI_threshold, threshold_brown, use_normalized, is_test_set=True)


../WSI/thesis_WSI/20-COMP-077/20-COMP-077.tiff : col  295  of  296

../WSI/thesis_WSI/20-COMP-065/20-COMP-065.tiff : col  327  of  328328

../WSI/thesis_WSI/19-COMP-039/19-COMP-039.tiff : col  335  of  336 336

../WSI/thesis_WSI/20-COMP-102/20-COMP-102.tiff : col  247  of  248

../WSI/thesis_WSI/19-COMP-037/19-COMP-037.tiff : col  303  of  304 of  304: col  32  of  304304  of  304

../WSI/thesis_WSI/19-COMP-026/19-COMP-026.tiff : col  295  of  296

time:  1088
../WSI/thesis_WSI/20-COMP-077/20-COMP-077.tiff : col  295  of  296

../WSI/thesis_WSI/20-COMP-065/20-COMP-065.tiff : col  327  of  328

../WSI/thesis_WSI/19-COMP-039/19-COMP-039.tiff : col  335  of  336

../WSI/thesis_WSI/20-COMP-102/20-COMP-102.tiff : col  247  of  24848

../WSI/thesis_WSI/19-COMP-037/19-COMP-037.tiff : col  303  of  304

../WSI/thesis_WSI/19-COMP-026/19-COMP-026.tiff : col  295  of  296

time:  918
