# HubMap- Hacking the Kidney
#### Goal - Mapping the human body at functional tissue unit level - detect glomeruli FTUs in kidney

#### Calculating the performance metrics for Whats Goin On team's model

##### Step 1 - Import useful libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import directed_hausdorff

In [2]:
# DATA_PATH = r'C:\Users\yiju\Desktop\Copy\Data\hubmap-kidney-segmentation/'
DATA_PATH = r'C:\Users\yiju\Desktop\Copy\Data\Colon_data_reprocessed/'
# Read rle prediction table (submission.csv) as df
# df = pd.read_csv(r'C:\Users\yiju\Desktop\Copy\Scripts\2. Gleb\final_result\1\submission_kidney.csv')
df = pd.read_csv(r'C:\Users\yiju\Desktop\Copy\Scripts\2. Gleb\final_result\5\submission_colon.csv')
# Read ground truth rles as rles
rles = pd.read_csv(DATA_PATH + 'test.csv')
# Read information dataframe for input height and width
df_info = pd.read_csv(DATA_PATH + 'HuBMAP-20-dataset_information.csv')

In [3]:
df

Unnamed: 0,id,predicted
0,CL_HandE_1234_B004_bottomleft,507737 8 512270 13 516804 17 521337 21 525871 ...
1,HandE_B005_CL_b_RGB_bottomleft,14211004 4 14215529 22 14220059 32 14224591 39...


In [4]:
len(rles)

2

In [5]:
rles

Unnamed: 0,id,predicted
0,CL_HandE_1234_B004_bottomleft,398887 19 403421 23 407956 25 412488 30 417022...
1,HandE_B005_CL_b_RGB_bottomleft,13716716 23 13721248 29 13725782 32 13730316 3...


In [6]:
df_info

Unnamed: 0,image_file,width_pixels,height_pixels,anatomical_structures_segmention_file,glomerulus_segmentation_file,patient_number,race,ethnicity,sex,age,weight_kilograms,height_centimeters,bmi_kg/m^2,laterality,percent_cortex,percent_medulla
0,CL_HandE_1234_B004_bottomleft,4704,4536,aa05346ff-anatomical-structure.json,aa05346ff.json,67347,White,Not Hispanic or Latino,Female,58,59.0,160.0,23.0,Right,80,20
1,CL_HandE_1234_B004_bottomright,4704,4536,afa5e8098-anatomical-structure.json,afa5e8098.json,67377,White,Not Hispanic or Latino,Female,58,59.0,160.0,23.0,Right,55,45
2,CL_HandE_1234_B004_topleft,4704,4536,54f2eec69-anatomical-structure.json,54f2eec69.json,67548,Black or African American,Not Hispanic or Latino,Male,58,79.9,190.5,22.0,Right,75,25
3,CL_HandE_1234_B004_topright,4704,4536,d488c759a-anatomical-structure.json,d488c759a.json,68138,White,Not Hispanic or Latino,Female,66,81.5,158.8,32.2,Left,100,0
4,HandE_B005_CL_b_RGB_bottomleft,4704,4536,1e2425f28-anatomical-structure.json,1e2425f28.json,63921,White,Not Hispanic or Latino,Male,48,131.5,193.0,35.3,Right,65,35
5,HandE_B005_CL_b_RGB_bottomright,4704,4536,e79de561c-anatomical-structure.json,e79de561c.json,67026,Black or African American,Not Hispanic or Latino,Male,53,73.0,166.0,26.5,Left,55,45
6,HandE_B005_CL_b_RGB_topleft,4704,4536,c68fe75ea-anatomical-structure.json,c68fe75ea.json,67112,White,Not Hispanic or Latino,Male,56,91.2,167.6,32.5,Left,80,20
7,HandE_B005_CL_b_RGB_topright,4704,4536,095bf7a1f-anatomical-structure.json,095bf7a1f.json,68250,White,Not Hispanic or Latino,Female,44,71.7,160.0,28.0,Right,65,35


##### Step 2 - Write utility functions

In [7]:
def dice_scores_img(pred, truth, eps=1e-8):
    pred = pred.reshape(-1) > 0
    truth = truth.reshape(-1) > 0
    intersect = (pred & truth).sum(-1)
    union = pred.sum(-1) + truth.sum(-1)

    dice = (2.0 * intersect + eps) / (union + eps)
    recall = intersect / truth.sum(-1)
    precision = intersect / pred.sum(-1)
    return dice, recall, precision

In [8]:
def perf_metrics(pred, gt):
    diff = gt - pred
    pixel_acc = 1 - len(diff[np.nonzero(diff)])/(gt.shape[0]*gt.shape[1])
    return pixel_acc, jaccard_score(gt.flatten(order='C'), pred.flatten(order='C')), directed_hausdorff(gt, pred)

In [9]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def enc2mask(encs, shape):
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for m, enc in enumerate(encs):
        if isinstance(enc, np.float) and np.isnan(enc):
            continue
        enc_split = enc.split()
        for i in range(len(enc_split) // 2):
            start = int(enc_split[2 * i]) - 1
            length = int(enc_split[2 * i + 1])
            img[start: start + length] = 1 + m

    return img.reshape(shape).T

In [10]:
def read_mask(mask_file, mask_shape):
    read_file = open(mask_file, "r", encoding='utf-8') 
    mask_data = json.load(read_file)
    polys = []
    for index in range(mask_data.__len__()):
        geom = np.array(mask_data[index]['geometry']['coordinates'], dtype=np.int32)
        polys.append(geom)

    mask = np.zeros(mask_shape)
    cv2.fillPoly(mask, polys, 1)
    mask = mask.astype(bool)
    return mask

##### Step 3 - Calculate mean metrics values for test images 

In [11]:
import json
import cv2
import matplotlib.pyplot as plt
from PIL import Image
sum_score = 0
sum_pa = 0
sum_ji = 0
sum_haus = 0

for img in rles['id'].unique():
    shape = df_info[df_info.image_file == img][['width_pixels', 'height_pixels']].values.astype(int)[0]
    truth = rles[rles['id'] == img]['predicted']
    mask_truth = enc2mask(truth, shape)
    # print (mask_truth.shape)
    pred = df[df['id'] == img]['predicted']
    mask_pred = enc2mask(pred, shape)  
    # print (mask_pred.shape)

    dice, recall, precision = dice_scores_img(mask_pred, mask_truth)
    print (f"{dice}\t{recall}\t{precision}\t{img}")
#     pa, ji, haus = perf_metrics(mask_pred, mask_truth)
#     print (score,pa,ji,haus)
    
#     sum_score += score
#     sum_pa += pa
#     sum_ji += ji
#     sum_haus += haus[0]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if sys.path[0] == '':


0.9016943737449112	0.9074757813927984	0.8959861649172294	CL_HandE_1234_B004_bottomleft
0.7481099424510551	0.7181195863518749	0.7807144047948542	HandE_B005_CL_b_RGB_bottomleft


In [12]:
# To find mean, divide by number of test images
# print (sum_score/len(rles), "\t", sum_pa/len(rles), "\t", sum_ji/len(rles), "\t", sum_haus/len(rles))