In [2]:
import sys
sys.path.append("../")

In [3]:
import numpy
import pandas
import re
import os
from density_calculations import calc_density
from tqdm import tqdm
import matplotlib.pyplot as pyplot
import matplotlib.patches as patches
from random import choice, sample

In [4]:
def generate_bins(N):

    temp = numpy.linspace(0, 1, 15)
    all_inidices = [i for i in range(1, temp.shape[0] - 1)]
    
    inidices = sorted(sample(all_inidices, N - 2))
    
    result = temp[inidices] 
    
    return numpy.concatenate([numpy.array([0]), result, numpy.array([1])])

In [5]:
def calc_density_with_bins(heatmap_benign,
                            bbox,
                            bins_nothing,
                            bins_benign                            
                        ):
    
    
    h, w = heatmap_benign.shape

    #this is used for rescaling the coordinates of boxes
    scale_factor = numpy.array([w, h, w, h], dtype=float)
    
    
    x_min, y_min, x_max, y_max = (bbox * scale_factor).astype(int)
    
    #returns counts of elements in each bin
    counts_not = numpy.unique(numpy.digitize(heatmap_benign[y_min:y_max, x_min:x_max], bins=bins_nothing), return_index=True, return_counts=True)[-1]
    counts_ben = numpy.unique(numpy.digitize(heatmap_benign[y_min:y_max, x_min:x_max], bins=bins_benign), return_index=True, return_counts=True)[-1]
    
    
    #dividing the highest count by the sum (total)                         
    density_not = counts_not.max() / counts_not.sum()
    density_ben = counts_ben.max() / counts_ben.sum()

    
    return density_not, density_ben

In [6]:
def load_and_process_with_bins_multiple_only_benign_gen_bins():
    
    path = "/home/server/other_projects/breast_cancer/DATA_PATH/temp_files"

    files = os.listdir(path)
    files.sort(key = lambda x: int(re.match(r"image(\d+).+\.npy", x)[1]))


    #How many bin combinations
    bins_count = 30
    #How many bins
    len_choices = [5, 6, 7, 8]
    
    not_bins = []    
    ben_bins = []
    
    #Generating random bins
    for it in range(bins_count):

        N = choice(len_choices)
        not_bins.append(generate_bins(N))
        
        N = choice(len_choices)
        ben_bins.append(generate_bins(N))
        

    ###
    #ben_bins = [[0.0, 0.10344828, 0.17241379, 0.24137931, 0.75862069, 1.]]
    
    N = len(files) // 3
    main_matrix = numpy.zeros((len(not_bins), len(ben_bins), 2, 2))
    
    
    for idx in tqdm(range(N)):
        
        temp = files[3* idx:3*idx + 3]
        temp.sort(key = lambda x: 0 if "bbox" in x else 1 if "ben" in x else 2)
        
        with open(os.path.join(path, temp[0]), 'rb') as f:

            bbox = numpy.load(f)

        with open(os.path.join(path, temp[1]), 'rb') as f:

            heatmap_benign = numpy.load(f)

        #the re.match used to get the annotations from the file names
        annot = re.match(".+_(.+).npy", temp[1])[1]
           
        for not_bin_index, not_bin in enumerate(not_bins):
            
            for ben_bin_index, ben_bin in enumerate(ben_bins):
         
                dens_not, dens_ben = calc_density_with_bins(heatmap_benign, bbox, not_bin, ben_bin)


                if annot in ["2", "3"]:
                    true_label = 1
                else:
                    true_label = 0


                is_benign = dens_ben > dens_not

                main_matrix[not_bin_index, ben_bin_index, true_label, int(is_benign)] += 1        

    specificity = numpy.zeros((len(not_bins), len(ben_bins), 2))    
    sensitivity = numpy.zeros((len(not_bins), len(ben_bins), 2))
    
    
    ##specificity
    
    specificity[:, :, 0] = (main_matrix[:, :, 1, 1])/\
    (main_matrix[:, :, 1, 0] + main_matrix[:, :, 1, 1])
    
    specificity[:, :, 1] = (main_matrix[:, :, 0, 0])/\
    (main_matrix[:, :, 0, 1] + main_matrix[:, :, 0, 0])

    
    ##sensitivity
    
    sensitivity[:, :, 0] = main_matrix[:, :, 0, 0]/(main_matrix[:, :, 0].sum(axis=-1))
    sensitivity[:, :, 1] = main_matrix[:, :, 1, 1]/(main_matrix[:, :, 1].sum(axis=-1))
    
    #mean accuracy
    
    corrects = main_matrix[:, :, 0, 0] + main_matrix[:, :, 1, 1]
    mean_accuracy = corrects / main_matrix.sum(axis=(-1, -2))
    
    return main_matrix, specificity, sensitivity, mean_accuracy, not_bins, ben_bins

In [7]:
main_matrix, specificity, sensitivity, mean_accuracy, not_bins, ben_bins = load_and_process_with_bins_multiple_only_benign_gen_bins()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 316/316 [00:22<00:00, 14.11it/s]


In [20]:
best_index = numpy.argmax(mean_accuracy)

In [21]:
best_index = numpy.unravel_index(best_index, (30, 30))

In [22]:
main_matrix[best_index[0], best_index[1]]

array([[213.,   0.],
       [ 96.,   7.]])

In [23]:
indices = numpy.where((sensitivity[..., 1] > 0.67) & (sensitivity[..., 0] > 0.51))

In [24]:
indices

(array([ 4,  5,  5,  5, 13, 13, 13, 22, 22, 22, 22, 26]),
 array([ 3,  3, 16, 25,  0,  1,  5,  0,  1,  5, 21,  3]))

In [25]:
main_matrix[indices[0][-1], indices[1][-1]]

array([[126.,  87.],
       [ 30.,  73.]])