# HubMap- Hacking the Kidney
#### Goal - Mapping the human body at functional tissue unit level - detect glomeruli FTUs in kidney

#### Calculating the performance metrics for Whats Goin On team's model

##### Step 1 - Import useful libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import directed_hausdorff

In [15]:
DATA_PATH = r'C:\Users\soodn\Downloads\Naveksha\Kaggle HuBMAP\Data\hubmap_colon_data'
df = pd.read_csv('submission_colon_generalized.csv')
rles = pd.read_csv(DATA_PATH + '/colon.csv')
df_info = pd.read_csv(DATA_PATH + '/Colon_data_reprocessed/Colon_data_reprocessed/HuBMAP-20-dataset_information.csv')

In [16]:
path_test = r'C:/Users/soodn/Downloads/Naveksha/Kaggle HuBMAP/Data/hubmap-kidney-segmentation-data/test/'

##### Step 2 - Write utility functions

In [17]:
def dice_scores_img(pred, truth, eps=1e-8):
    pred = pred.reshape(-1) > 0
    truth = truth.reshape(-1) > 0
    intersect = (pred & truth).sum(-1)
    union = pred.sum(-1) + truth.sum(-1)

    dice = (2.0 * intersect + eps) / (union + eps)
    return dice

In [18]:
def perf_metrics(gt, pred):
    n = 0
    d = 0
    for i in range(gt.shape[0]):
        for j in range (gt.shape[1]):
            if (gt[i][j]==pred[i][j]):
                n = n+1
            d = d+1
    
    return n/d, jaccard_score(gt.flatten(order='C'), pred.flatten(order='C')), directed_hausdorff(gt, pred)

In [19]:
def read_mask(mask_file, mask_shape):
    read_file = open(mask_file, "r", encoding='utf-8') 
    mask_data = json.load(read_file)
    polys = []
    for index in range(mask_data.__len__()):
        geom = np.array(mask_data[index]['geometry']['coordinates'], dtype=np.int32)
        polys.append(geom)

    mask = np.zeros(mask_shape)
    # print (type(polys), type(polys[0]), type(polys[0][0][0][0]))
    cv2.fillPoly(mask, polys, 1)
    mask = mask.astype(bool)
    return mask

In [20]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    mask_rle = str(mask_rle)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)

    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def enc2mask(encs, shape):
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for m, enc in enumerate(encs):
        if isinstance(enc, np.float) and np.isnan(enc):
            continue
        enc_split = enc.split()
        for i in range(len(enc_split) // 2):
            start = int(enc_split[2 * i]) - 1
            length = int(enc_split[2 * i + 1])
            img[start: start + length] = 1 + m

    return img.reshape(shape).T

In [21]:
def rleToMask(rleString,height,width):
    rows,cols = height,width
    # print (type(rleString))
    rleNumbers = [int(numstring) for numstring in rleString.split(' ')]
    rle_pairs = np.array(rleNumbers).reshape(-1,2)
    img = np.zeros(rows*cols,dtype=np.uint8)
    for index,length in rle_pairs:
        index -= 1
        img[index:index+length] = 255
    img = img.reshape(cols,rows)
    img = img.T
    return img

In [45]:
df

Unnamed: 0,id,predicted
0,CL_HandE_1234_B004.jpg_bottom_left,
1,CL_HandE_1234_B004.jpg_bottom_right,
2,CL_HandE_1234_B004.jpg_top_left,
3,CL_HandE_1234_B004.jpg_top_right,
4,HandE_B005_CL_b_RGB.jpg_bottom_left,
5,HandE_B005_CL_b_RGB.jpg_bottom_right,
6,HandE_B005_CL_b_RGB.jpg_top_left,
7,HandE_B005_CL_b_RGB.jpg_top_right,


##### Step 3 - Calculate mean metrics values for test images 

In [46]:
import json
import cv2
import matplotlib.pyplot as plt
from PIL import Image
sum_score = 0
sum_pa = 0
sum_ji = 0
sum_haus = 0

for img_ in rles['id'].unique():
    img_list = img_.split('.')
    jpg = img_list[1]
    img_new = jpg.split('_')
    img = img_list[0]+'_'+img_new[1]+img_new[2] 
    shape = df_info[df_info.image_file == img][['width_pixels', 'height_pixels']].values.astype(int)[0]
    truth = rles[rles['id'] == img_]['encodings']
    mask_truth = enc2mask(truth, shape)
    pred = df[df['id'] == img_]['predicted']
    mask_pred = enc2mask(pred, shape)  
    score = dice_scores_img(mask_pred, mask_truth)
    print (score)
    pa, ji, haus = perf_metrics(mask_pred, mask_truth)
    print (pa)
    sum_score += score
    sum_pa += pa
    sum_ji += ji
    sum_haus += haus[0]

5.5555802470232896e-15
0.9156412344479238
1.0225356634875928e-14
0.9541666947863802
3.977522226394186e-15
0.8821724015885014
6.480419412744351e-15
0.927680314850808
1.2078228268851554e-14
0.9611977948145749
1.0


  _warn_prf(average, modifier, msg_start, len(result))


1.0
1.8440998026812873e-14
0.9745858716061381
1.589249048039795e-14
0.9705104815294725


In [47]:
l = len(df)
print (sum_score/l, sum_pa/l, sum_ji/l, sum_haus/l)

0.1250000000000091 0.9482443492029747 0.0 0.0


In [48]:
sum_haus

0.0

In [49]:
l

8