# Development of the IOU functionality 

In [None]:
def iou_courtney(gt, pred):
    gt_bool = np.array(gt, dtype=bool)
    pred_bool = np.array(pred, dtype=bool)

    overlap = gt_bool*pred_bool # Logical AND
    union = gt_bool + pred_bool # Logical OR

    IOU = float(overlap.sum())/float(union.sum())
    
    return IOU

In [None]:
import sys
import os
project_dir = os.path.dirname(os.getcwd())
sys.path.append(project_dir) #notebooks can't look into source unless you append the parent project directory to the path
print(project_dir)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from scipy.misc import imresize
import skimage

In [None]:
from src.d00_utils.db_utils import dbReadWriteViews

In [None]:
io_views = dbReadWriteViews()

In [None]:
table = io_views.get_table('frames_by_volume_mask')

In [None]:
file_lookup = io_views.get_table('instances_unique_master_list')
file_lookup.head()

In [None]:
table_filenames = pd.merge(table, file_lookup, how='left', on =['instanceidk', 'studyidk'])
table_filenames.head()

In [None]:
instance_id_list = io_views.get_table('instances_with_labels_scrappy100') #this is the list of 100 study ids from Dave
instance_id_list = instance_id_list.instanceidk
print(instance_id_list.shape)

print('check orginal number of instances in table with filenames')
print(table_filenames.instanceidk.nunique())

In [None]:
#Filter the table_filenames table to only include these 100 studies
table_filenames_filtered_100 = table_filenames[table_filenames['instanceidk'].isin(instance_id_list)]

In [None]:
print(table_filenames_filtered_100.instanceidk.nunique())
print(table_filenames_filtered_100.shape)
table_filenames_filtered_100.head()

In [None]:
table_filenames_filtered_100_melt = pd.melt(table_filenames_filtered_100, id_vars=['instanceidk', 'indexinmglist', 'chordsequence', 'chordtype'
                                                                                   , 'x1coordinate', 'y1coordinate', 'chordlength', 'x2coordinate'
                                                                                   , 'y2coordinate', 'interchorddistance', 'studyidk', 'frame'
                                                                                   , 'sopinstanceuid', 'instancefilename']
                                            , value_vars=['a4c_ven_ed', 'a4c_ven_es', 'a2c_ven_ed', 'a2c_ven_es', 'a4c_atr_es', 'a2c_atr_es']
                                            , var_name='view_name', value_name='view_exists')

In [None]:
table_filenames_filtered_100_melt = table_filenames_filtered_100_melt[table_filenames_filtered_100_melt.view_exists == True]
print(table_filenames_filtered_100_melt.shape)
print(table_filenames_filtered_100_melt.columns)

In [None]:
# drop chord sequence and remove duplicates
table_filenames_filtered_100_melt = table_filenames_filtered_100_melt.drop(['chordsequence', 'chordtype', 'x1coordinate', 'y1coordinate'
                                        , 'chordlength', 'x2coordinate', 'y2coordinate', 'interchorddistance'], axis=1)

In [None]:
table_filenames_filtered_100_melt = table_filenames_filtered_100_melt.drop_duplicates()

In [None]:
print(table_filenames_filtered_100_melt.shape)
table_filenames_filtered_100_melt.view_name.value_counts()

In [None]:
table_filenames_filtered_100_melt.head()

In [None]:
table_filenames_filtered_100_melt['instanceidk'].count()

In [None]:
# go through each of the views and select the:
# gt file (think about convention)
# match to pred file frame chamber

table_filenames_filtered_100_melt['iou_score'] = ''

for i in table_filenames_filtered_100_melt.index:
    #ground truth
    fileno = str(table_filenames_filtered_100_melt.at[i, 'instancefilename'])
    frame = table_filenames_filtered_100_melt.at[i, 'frame']
    view_chamber_string_list = table_filenames_filtered_100_melt.at[i, 'view_name'].split("_")
    view = view_chamber_string_list[0]
    chamber = view_chamber_string_list[1]
    instance = table_filenames_filtered_100_melt.at[i, 'instanceidk']
    study = table_filenames_filtered_100_melt.at[i, 'studyidk']
       
    #convert chamber to the right format
    if chamber == 'ven':
        chamber = 'lv'
    if chamber == 'atr':
        chamber = 'la'
        
    gt_filename = str('a_' + str(fileno) + '.dcm_'+ str(frame) + '_' + chamber + '_gt.npy')
    gt_path = str('/home/yoni_nachmany/dicom_image_classification/notebooks/masks/{}'.format(gt_filename))
    
    gt = np.load(gt_path)
    gt = skimage.transform.resize(gt, (384,384))
    
    #find corresponding prediction
    pred_filename = str('a_' + str(study) + '_' + str(fileno) + '.dcm_' + chamber + '.npy')
    pred_path = str('/home/courtney_irwin/dicom_image_classification/notebooks/segments/{}/{}'.format(view, pred_filename))

    try:
        pred = np.load(pred_path)
        pred_frame = pred[frame,:,:] # selecting only the frame with the relevant gt
        score = iou_courtney(gt, pred_frame)
        table_filenames_filtered_100_melt.at[i, 'iou_score'] = score
        print('IOU score added: {}'.format(score))
        
        
        plt.figure()
        plt.subplot(1, 2, 1)
        plt.imshow(gt)
        plt.title('GT {} {}'.format(view, chamber))
        plt.subplot(1, 2, 2)
        plt.imshow(pred_frame)
        plt.title('Pred {} {} IOU Score {}'.format(view, chamber, round(score,2)))
    except:
        print("Oops!",sys.exc_info()[0],"occured.  File {} doesn't exist".format(pred_path))
        print()

In [None]:
table_filenames_filtered_100_melt.iou_score = table_filenames_filtered_100_melt.iou_score.replace('', np.nan)

print("Number of files tested: {}".format(table_filenames_filtered_100_melt.instanceidk.count()))
print("Number of files found: {}" .format(table_filenames_filtered_100_melt.iou_score.count()))
print("Average IOU: {}".format(round(table_filenames_filtered_100_melt.iou_score.mean(), 2)))

In [None]:
table_filenames_filtered_100_melt