In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import json 
import matplotlib.pyplot as plt


from sklearn.metrics import jaccard_score
from scipy.spatial.distance import directed_hausdorff

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:

# <h6> Step 2 - Write utility functions </h6> 
def enc2mask(encs, shape):
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for m, enc in enumerate(encs):
        if isinstance(enc, float) and np.isnan(enc):
            continue
        enc_split = enc.split()
        for i in range(len(enc_split) // 2):
            start = int(enc_split[2 * i]) - 1
            length = int(enc_split[2 * i + 1])
            img[start: start + length] = 1 + m
    return img.reshape(shape).T


In [None]:
def dice_scores_img(pred, truth, eps=1e-8):
    pred = pred.reshape(-1) > 0
    truth = truth.reshape(-1) > 0
    intersect = (pred & truth).sum(-1)
    union = pred.sum(-1) + truth.sum(-1)
    dice = (2.0 * intersect + eps) / (union + eps)
    return dice

In [None]:

def perf_metrics(gt, pred):
    n = 0
    d = 0
    for i in range(gt.shape[0]):
        for j in range (gt.shape[1]):
            if (gt[i][j]==pred[i][j]):
                n = n+1
            d = d+1
    return n/d, jaccard_score(gt.flatten(order='C'), pred.flatten(order='C'))

In [None]:
def get_confusion_matrix(mask_truth, mask_pred):
    mask1 = mask_truth.flatten()
    mask2 = mask_pred.flatten()

    # calculate the confusion matrix
    tn, fp, fn, tp = confusion_matrix(mask1, mask2, labels=[0, 1]).ravel()
    return tn, fp, fn, tp

def get_hausdorff_distance(mask_truth, mask_pred):
    # find coordinates of non-zero elements in the masks
    gt_coords = np.argwhere(mask_truth)
    pred_coords = np.argwhere(mask_pred)

    # calculate directed Hausdorff distance from gt to pred
    hausdorff_gt_to_pred = directed_hausdorff(gt_coords, pred_coords)[0]
    #hausdorff_gt_to_pred_mask = directed_hausdorff(mask_truth, mask_pred)[0] # incorrect

    # calculate directed Hausdorff distance from pred to gt
    hausdorff_pred_to_gt = directed_hausdorff(pred_coords, gt_coords)[0]

    # take the 95th percentile Hausdorff distance
    hausdorff_distance_95 = np.percentile([hausdorff_pred_to_gt, hausdorff_gt_to_pred], 95)

    return hausdorff_gt_to_pred, hausdorff_pred_to_gt, hausdorff_distance_95

In [None]:
# ##### Step 3 - Calculate mean metrics values for test images 
BASE_PATH = os.getcwd()+'/'
INPUT_PATH = BASE_PATH
print(INPUT_PATH)

In [None]:

df_metadata = pd.read_csv(INPUT_PATH+'dataset_split_metadata/all_metadata_for_publication.csv')


In [None]:
df_metadata = df_metadata[df_metadata["Usage"] == "private_test"]


In [None]:
df_info = df_metadata[["filename", "tissue_name", "image_dims", "rle"]]


In [None]:
df_info = df_info.rename(columns={'filename': 'id'})
df_info['filename'] = df_info['id']


In [None]:
json_metadata_dir = "dataset_split_metadata/"
with open(json_metadata_dir + "old_to_new_id_map.json") as f:
    mapping_json = json.load(f) 
mapping_json_inv = {int(v): k for k, v in mapping_json.items()}

In [None]:
df_info['id'] = df_info['id'].map(mapping_json)


In [None]:
df_truth = df_info[["id", "rle", "filename", "tissue_name"]]


In [None]:
df_info.reset_index(drop=True, inplace=True)
df_truth.reset_index(drop=True, inplace=True)
df_truth = df_truth.astype({"id": int})

In [None]:
for i in range(6,51):
    team = f"team_{i}"
    df_pred = pd.read_csv(INPUT_PATH+f'winning-submissions/other_top50_teams/{team}/submission.csv')
    
    df_pred.reset_index(drop=True, inplace=True)
    df_pred = df_pred.astype({"id": int})
    
    scores = {}
    iou_list = {}
    hd_pred_to_gt_list = {}
    hd_95_list = {}
    count = 0
    for j in df_info.index:
        
        id = df_info.at[j, "id"]
        shape = df_info.at[j, "image_dims"]
        shape = shape.strip().strip('(').strip(')').split(',')
        shape = [int(shape[0]), int(shape[1])]
        id_int = int(id)
        truth = df_truth[df_truth['id'] == id_int]['rle']

        mask_truth = enc2mask(truth, shape)
        
        pred = df_pred[df_pred['id'] == id_int]['rle']
        mask_pred = enc2mask(pred, shape) 
        
        score = dice_scores_img(mask_pred, mask_truth)
        _, iou_score = perf_metrics(mask_truth, mask_pred)
        
        hd_gt_to_pred, hd_pred_to_gt, hd_95 = get_hausdorff_distance(mask_truth, mask_pred)
        
        scores[id] = score
        iou_list[id] = iou_score
        hd_pred_to_gt_list[id] = hd_pred_to_gt
        hd_95_list[id] = hd_95
        count +=1
        # print(id, count)
  

    # Export dataframes to CSVs.

    #1. Export pred and gt mask with id
    
    df_masks = df_truth.merge(df_pred, on='id', how='left', suffixes=('_truth', '_pred'))
    df_masks.to_csv(INPUT_PATH+f'winning-submissions/other_top50_teams/{team}/masks.csv',index=None)

    #2. Export dice and IOU value with id, filename, organ.
    # create a list of tuples from the dictionary
    scores_list_tuples = [(key, value) for key, value in scores.items()]
    iou_list_tuples = [(key, value) for key, value in iou_list.items()]
    hd_list_tuples = [(key, value) for key, value in hd_pred_to_gt_list.items()]
    hd95_list_tuples = [(key, value) for key, value in hd_95_list.items()]
    # create a pandas dataframe from the list
    df_scores_list_tuples = pd.DataFrame(scores_list_tuples, columns=['id', 'dice_score'])
    df_iou_list_tuples = pd.DataFrame(iou_list_tuples, columns=['id', 'iou_score'])
    df_hd_list_tuples = pd.DataFrame(hd_list_tuples, columns=['id', 'hd_score'])
    df_hd95_list_tuples = pd.DataFrame(hd95_list_tuples, columns=['id', 'hd95_score'])

    df_scores_list_tuples = df_scores_list_tuples.astype({"id": int})
    df_iou_list_tuples = df_iou_list_tuples.astype({"id": int})
    df_hd_list_tuples = df_hd_list_tuples.astype({"id": int})
    df_hd95_list_tuples = df_hd95_list_tuples.astype({"id": int})

    df_metrics_temp = df_scores_list_tuples.merge(df_iou_list_tuples, on='id', how='left')
    df_metrics_temp = df_metrics_temp.merge(df_hd_list_tuples, on='id', how='left')
    df_metrics_temp = df_metrics_temp.merge(df_hd95_list_tuples, on='id', how='left')
    df_metrics = df_metrics_temp.merge(df_truth, on='id', how='left', suffixes=('_truth', '_pred'))

    # drop the 'rle' column
    df_metrics = df_metrics.drop('rle', axis=1)
    df_metrics.to_csv(INPUT_PATH+f'winning-submissions/other_top50_teams/{team}/metrics.csv',index=None)


    print(f"Team completed: {team}")
    
print("Metric computation complete")