In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import json 
import matplotlib
import matplotlib.pyplot as plt


from sklearn.metrics import jaccard_score
from scipy.spatial.distance import directed_hausdorff

import cv2
import tifffile
import seaborn as sns
from itertools import chain
from PIL import Image
from PIL import ImageDraw

import skimage
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
#Read Metrics file for the team
BASE_PATH = os.getcwd()+'/'
INPUT_PATH = BASE_PATH
print(INPUT_PATH)

team = "team_3"
df_metrics = pd.read_csv(INPUT_PATH+f'winning-submissions/{team}/metrics.csv')
df_metrics.head()

In [None]:
df_masks = pd.read_csv(INPUT_PATH+f'winning-submissions/{team}/masks.csv')
df_masks.head()

In [None]:
df_metadata = pd.read_csv(INPUT_PATH+'dataset_split_metadata/all_metadata_for_publication.csv')
df_metadata.head()

In [None]:
# Utility Methods

# Method to convert RLE to binary image mask
def enc2mask(encs, shape):
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for m, enc in enumerate(encs):
        if isinstance(enc, float) and np.isnan(enc):
            continue
        enc_split = enc.split()
        for i in range(len(enc_split) // 2):
            start = int(enc_split[2 * i]) - 1
            length = int(enc_split[2 * i + 1])
            img[start: start + length] = 1 + m
   
    return img.reshape(shape).T

# Method to visualize images and masks
def show_image(filename,img_id, rle_truth, rle_pred, img_shape, organ, case, index):
    fig, ax = plt.subplots(1,5,figsize=(20,40))

    image = plt.imread(INPUT_PATH+f'published_data/data/data/images/{organ}/{filename}.tif')
    mask_pred = enc2mask(rle_pred,shape=(img_shape[0],img_shape[1]))
    mask_truth = enc2mask(rle_truth,shape=(img_shape[0],img_shape[1]))
    mask_diff = mask_truth - mask_pred #np.abs(mask_truth - mask_pred)
    # 1 [FN, not predicted]
    mask_diff_fn = mask_diff == 1
    mask_diff_fn = mask_diff_fn.astype(np.uint8)
    # -1 (255) [FP, incorrectly predicted]
    mask_diff_fp = mask_diff > 1
    mask_diff_fp = mask_diff_fp.astype(np.uint8)
    #mask_diff = mask_diff > 0
    # mask_diff = mask_diff.astype(np.uint8)
   
    #display(pd.DataFrame(df_organ.loc[idx,['id','organ','age','sex']]).T)
    diff_cmap = matplotlib.colors.ListedColormap(['blue', 'black', 'red'])
    ax[0].imshow(image)
    ax[0].set_title(f'file id: {img_id}')
    ax[0].axis("off")

    ax[1].imshow(mask_truth,alpha=0.7,cmap='gray')
    #ax[1].imshow(mask_pred,alpha=0.3,cmap='Reds')
    ax[1].set_title("Ground Truth")
    ax[1].axis("off")

    ax[2].imshow(mask_pred,alpha=0.7,cmap='gray')
    #ax[2].imshow(mask_pred,alpha=0.3,cmap='Reds')
    ax[2].set_title("Prediction")
    ax[2].axis("off")

    ax[3].imshow(mask_diff_fn,alpha=0.9,cmap='Reds')
    ax[3].imshow(mask_diff_fp,alpha=0.9,cmap='Blues')
    #ax[3].imshow(mask_pred,alpha=0.3,cmap='Reds')
    ax[3].set_title("Mask Difference (FN: Red, FP: Blue)")
    ax[3].axis("off")

    ax[4].imshow(image)
    ax[4].imshow(mask_pred,alpha=0.5,cmap='gray')
    ax[4].set_title("Prediction Overlay")
    ax[4].axis("off")

    # plt.savefig(INPUT_PATH+f'winning-submissions/{team}/viz/viz_{organ}_{case}_{index}.png', bbox_inches='tight')
    # plt.close(fig)
    plt.show()
    print('-'*50)

# Method to get n best or worst cases for an organ
def get_n_cases(df, organ, metric, ascending, n=5):
    if metric == 'dice':
        m = 'dice_score'
    elif metric == "iou":
        m = 'iou_score'
    # Select kidney images
    selected_rows = df[df['tissue_name'] == organ]
    # sort the dataframe in decreasing order based on the 'dice_score' column
    sorted_df = selected_rows.sort_values(by=m, ascending=ascending)

    # get the top 5 rows of the sorted dataframe
    n_cases = sorted_df.head(n)

    #Save to csv
    if ascending == True:
        case = 'worst'
    elif ascending == False:
        case = 'best'
    n_cases.to_csv(INPUT_PATH+f'winning-submissions/{team}/{organ}_{metric}_{case}_{n}.csv',index=None)

    return n_cases

# Method to visualize cases from get_n_cases(). 
def visualize_cases(df, case):
    for index, row in df.iterrows():
        organ = row['tissue_name']
        filename = row['filename']
        img_id = row['id']

        mask_row = df_masks[df_masks['id'] == img_id]
        rle_pred = mask_row['rle_pred']
        rle_truth = mask_row['rle_truth']
        shape = df_metadata[df_metadata['filename'] == filename]['image_dims'].iloc[0]
        shape = shape.strip().strip('(').strip(')').split(',')
        img_shape = [int(shape[0]), int(shape[1])]
        
        show_image(filename, img_id, rle_truth, rle_pred, img_shape, organ, case, index)
        #break
    

In [None]:
dice_kidney = []
dice_largeintestine = []
dice_spleen = []
dice_lung = []
dice_prostate = []

#for k,v in scores.items():
for index, row in df_metrics.iterrows():
    tissue_type = row['tissue_name']
    # print(tissue_type)
    if tissue_type == 'kidney':
        dice_kidney.append(row['dice_score'])
    elif tissue_type == 'largeintestine':
        dice_largeintestine.append(row['dice_score'])
    elif tissue_type == 'spleen':
        dice_spleen.append(row['dice_score'])
    elif tissue_type == 'lung':
        dice_lung.append(row['dice_score'])
    elif tissue_type == 'prostate':
        dice_prostate.append(row['dice_score'])
    # print(tissue_type)
    # break
print(len(dice_kidney))
print(len(dice_largeintestine))
print(len(dice_spleen))
print(len(dice_lung))
print(len(dice_prostate))

#total = len(dice_kidney)+len(dice_largeintestine)+len(dice_lung)+len(dice_spleen)+len(dice_prostate)
all_dice = [] 
all_dice.extend(dice_kidney)
all_dice.extend(dice_largeintestine)
all_dice.extend(dice_lung)
all_dice.extend(dice_spleen)
all_dice.extend(dice_prostate) 
print(len(all_dice))
#print(total)

In [None]:
iou_kidney = []
iou_largeintestine = []
iou_spleen = []
iou_lung = []
iou_prostate = []

#for k,v in iou_list.items():
for index, row in df_metrics.iterrows():
    tissue_type = row['tissue_name']
    # print(tissue_type)
    if tissue_type == 'kidney':
        iou_kidney.append(row['iou_score'])
    elif tissue_type == 'largeintestine':
        iou_largeintestine.append(row['iou_score'])
    elif tissue_type == 'spleen':
        iou_spleen.append(row['iou_score'])
    elif tissue_type == 'lung':
        iou_lung.append(row['iou_score'])
    elif tissue_type == 'prostate':
        iou_prostate.append(row['iou_score'])
    # print(tissue_type)
    # break
print(len(iou_kidney))
print(len(iou_largeintestine))
print(len(iou_spleen))
print(len(iou_lung))
print(len(iou_prostate))

#total = len(iou_kidney)+len(iou_largeintestine)+len(iou_lung)+len(iou_spleen)+len(iou_prostate)
all_iou = [] 
all_iou.extend(iou_kidney)
all_iou.extend(iou_largeintestine)
all_iou.extend(iou_lung)
all_iou.extend(iou_spleen)
all_iou.extend(iou_prostate) 
print(len(all_iou))
#print(total)

In [None]:
print ("Kidney: Average Dice Score = ", round(sum(dice_kidney)/len(dice_kidney),5))
print ("Large Intestine: Average Dice Score = ", round(sum(dice_largeintestine)/len(dice_largeintestine),5))
print ("Spleen: Average Dice Score = ", round(sum(dice_spleen)/len(dice_spleen),5))
print ("Prostate: Average Dice Score = ", round(sum(dice_prostate)/len(dice_prostate),5))
print ("Lung: Average Dice Score = ", round(sum(dice_lung)/len(dice_lung),5))
print ("Overall: Average Dice Score = ", round(sum(all_dice)/len(all_dice),5))

In [None]:
print ("Kidney: Average IOU Score = ", round(sum(iou_kidney)/len(iou_kidney),5))
print ("Large Intestine: Average IOU Score = ", round(sum(iou_largeintestine)/len(iou_largeintestine),5))
print ("Spleen: Average IOU Score = ", round(sum(iou_spleen)/len(iou_spleen),5))
print ("Prostate: Average IOU Score = ", round(sum(iou_prostate)/len(iou_prostate),5))
print ("Lung: Average IOU Score = ", round(sum(iou_lung)/len(iou_lung),5))
print ("Overall: Average IOU Score = ", round(sum(all_iou)/len(all_iou),5))

In [None]:
# Get filename from id
# id = 3667
# filename = df_metrics[df_metrics['id'] == id].filename.iloc[0]
# filename

# Get best 5 and worst 5 cases per organ for Dice.

### Best 5

In [None]:
# kidney
case = 'dice_best'
top_5 = get_n_cases(df_metrics, 'kidney', 'dice', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)    

In [None]:
# largeintestine
case = 'dice_best'
top_5 = get_n_cases(df_metrics, 'largeintestine', 'dice', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# spleen
case = 'dice_best'
top_5 = get_n_cases(df_metrics, 'spleen', 'dice', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# lung
case = 'dice_best'
top_5 = get_n_cases(df_metrics, 'lung', 'dice', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# prostate
case = 'dice_best'
top_5 = get_n_cases(df_metrics, 'prostate', 'dice', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

### Worst 5

In [None]:
# kidney
case = 'dice_worst'
worst_5 = get_n_cases(df_metrics, 'kidney', 'dice', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# largeintestine
case = 'dice_worst'
worst_5 = get_n_cases(df_metrics, 'largeintestine', 'dice', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# spleen
case = 'dice_worst'
worst_5 = get_n_cases(df_metrics, 'spleen', 'dice', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# lung
case = 'dice_worst'
worst_5 = get_n_cases(df_metrics, 'lung', 'dice', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# prostate
case = 'dice_worst'
worst_5 = get_n_cases(df_metrics, 'prostate', 'dice', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

# Get best 5 and worst 5 cases per organ for IOU.

### Best 5

In [None]:
# kidney
case = 'iou_best'
top_5 = get_n_cases(df_metrics, 'kidney', 'iou', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# largeintestine
case = 'iou_best'
top_5 = get_n_cases(df_metrics, 'largeintestine', 'iou', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# spleen
case = 'iou_best'
top_5 = get_n_cases(df_metrics, 'spleen', 'iou', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# lung
case = 'iou_best'
top_5 = get_n_cases(df_metrics, 'lung', 'iou', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

In [None]:
# prostate
case = 'iou_best'
top_5 = get_n_cases(df_metrics, 'prostate', 'iou', False, 5)
top_5 = top_5.reset_index(drop=True)
display(top_5)

visualize_cases(top_5, case)

### Worst 5

In [None]:
# kidney
case = 'iou_worst'
worst_5 = get_n_cases(df_metrics, 'kidney', 'iou', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# largeintestine
case = 'iou_worst'
worst_5 = get_n_cases(df_metrics, 'largeintestine', 'iou', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# spleen
case = 'iou_worst'
worst_5 = get_n_cases(df_metrics, 'spleen', 'iou', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# lung
case = 'iou_worst'
worst_5 = get_n_cases(df_metrics, 'lung', 'iou', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# prostate
case = 'iou_worst'
worst_5 = get_n_cases(df_metrics, 'prostate', 'iou', True, 5)
worst_5 = worst_5.reset_index(drop=True)
display(worst_5)

visualize_cases(worst_5, case)

In [None]:
# def enc2mask(encs, shape):
#     img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
#     for m, enc in enumerate(encs):
#         if isinstance(enc, float) and np.isnan(enc):
#             continue
#         enc_split = enc.split()
#         for i in range(len(enc_split) // 2):
#             start = int(enc_split[2 * i]) - 1
#             length = int(enc_split[2 * i + 1])
#             img[start: start + length] = 1 + m
#     return img.reshape(shape).T

In [None]:
# def show_image(filename, rle_truth, rle_pred, img_shape, organ, case, index):
#     fig, ax = plt.subplots(1,4,figsize=(20,40))

#     image = plt.imread(INPUT_PATH+f'published_data/data/data/images/{organ}/{filename}.tif')
#     mask_pred = enc2mask(rle_pred,shape=(img_shape[0],img_shape[1]))
#     mask_truth = enc2mask(rle_truth,shape=(img_shape[0],img_shape[1]))
#     mask_diff = mask_truth - mask_pred
#     #display(pd.DataFrame(df_organ.loc[idx,['id','organ','age','sex']]).T)
#     ax[0].imshow(image)
#     ax[0].set_title("Image")
#     ax[0].axis("off")
#     ax[1].imshow(mask_pred,alpha=0.7,cmap='gray')
#     #ax[1].imshow(mask_pred,alpha=0.3,cmap='Reds')
#     ax[1].set_title("Predicted Mask")
#     ax[1].axis("off")
#     ax[2].imshow(mask_diff,alpha=0.7,cmap='plasma')
#     #ax[1].imshow(mask_pred,alpha=0.3,cmap='Reds')
#     ax[2].set_title("Mask Difference")
#     ax[2].axis("off")
#     ax[3].imshow(image)
#     ax[3].imshow(mask_pred,alpha=0.5,cmap='gray')
#     ax[3].set_title("Prediction Overlay")
#     ax[3].axis("off")
#     plt.savefig(INPUT_PATH+f'winning-submissions/{team}/viz/viz_{case}_{index}.png', bbox_inches='tight')
#     plt.show()
#     print('-'*50)

In [None]:
# def visualize_cases(df, case):
#     for index, row in df.iterrows():
#         organ = row['tissue_name']
#         filename = row['filename']
#         img_id = row['id']

#         mask_row = df_masks[df_masks['id'] == img_id]
#         rle_pred = mask_row['rle_pred']
#         rle_truth = mask_row['rle_truth']
#         shape = df_metadata[df_metadata['filename'] == filename]['image_dims'].iloc[0]
#         shape = shape.strip().strip('(').strip(')').split(',')
#         img_shape = [int(shape[0]), int(shape[1])]
        
#         show_image(filename, rle_truth, rle_pred, img_shape, organ, case, index)
#         #break
    

In [None]:
# # for best 5 kidney dice
# case = 'dice_best'
# top_5 = get_n_cases(df_metrics, 'kidney', 'dice', False, 5)
# top_5 = top_5.reset_index(drop=True)
# display(top_5)

# visualize_cases(top_5, case)