# HubMap- Hacking the Kidney
#### Goal - Mapping the human body at functional tissue unit level - detect glomeruli FTUs in kidney

#### Calculating the performance metrics for Whats Goin On team's model

##### Step 1 - Import useful libraries

In [25]:
import numpy as np
import pandas as pd
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import directed_hausdorff

In [26]:
DATA_PATH = r'C:/Users/soodn/Downloads/Naveksha/Kaggle HuBMAP/'
df = pd.read_csv('submission-wgo.csv')
df = pd.read_csv('submission-wgo-kaggle.csv')
rles = pd.read_csv(DATA_PATH + 'Data/hubmap-kidney-segmentation-data/test.csv')
df_info = pd.read_csv(DATA_PATH + 'Data/hubmap-kidney-segmentation-data/HuBMAP-20-dataset_information.csv')

In [36]:
path_test = r'C:/Users/soodn/Downloads/Naveksha/Kaggle HuBMAP/Data/hubmap-kidney-segmentation-data/test/'

##### Step 2 - Write utility functions

In [28]:
def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    mask_rle = str(mask_rle)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)

    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

def enc2mask(encs, shape):
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for m, enc in enumerate(encs):
        if isinstance(enc, np.float) and np.isnan(enc):
            continue
        enc_split = enc.split()
        for i in range(len(enc_split) // 2):
            start = int(enc_split[2 * i]) - 1
            length = int(enc_split[2 * i + 1])
            img[start: start + length] = 1 + m

    return img.reshape(shape).T

In [29]:
def dice_scores_img(pred, truth, eps=1e-8):
    pred = pred.reshape(-1) > 0
    truth = truth.reshape(-1) > 0
    intersect = (pred & truth).sum(-1)
    union = pred.sum(-1) + truth.sum(-1)

    dice = (2.0 * intersect + eps) / (union + eps)
    return dice

In [30]:
def perf_metrics(gt, pred):
    n = 0
    d = 0
    for i in range(gt.shape[0]):
        for j in range (gt.shape[1]):
            if (gt[i][j]==pred[i][j]):
                n = n+1
            d = d+1
    
    return n/d, jaccard_score(gt.flatten(order='C'), pred.flatten(order='C')), directed_hausdorff(gt, pred)

In [31]:
def read_mask(mask_file, mask_shape):
    read_file = open(mask_file, "r", encoding='utf-8') 
    mask_data = json.load(read_file)
    polys = []
    for index in range(mask_data.__len__()):
        geom = np.array(mask_data[index]['geometry']['coordinates'], dtype=np.int32)
        polys.append(geom)

    mask = np.zeros(mask_shape)
    print (type(polys), type(polys[0]), type(polys[0][0][0][0]))
    cv2.fillPoly(mask, polys, 1)
    mask = mask.astype(bool)
    return mask

##### Step 3 - Calculate mean metrics values for test images 

In [54]:
import json
import cv2
sum_score = 0
sum_pa = 0
sum_ji = 0
sum_haus = 0

for img in rles['id'].unique():
    shape = df_info[df_info.image_file == img + ".tiff"][['width_pixels', 'height_pixels']].values.astype(int)[0]
    mt = read_mask(path_test+img+'-anatomical-structure.json', shape)
    mask_truth = 1*mt
    rle_truth = rles[rles['id'] == img]["encoding"]
    pred = df[df['id'] == img]['predicted']
    mask_truth_rle = enc2mask(rle_truth, shape)
    mask_pred = enc2mask(pred, shape)
    print (mask_truth.shape, mask_truth_rle.shape)
    print (np.sum(mask_truth != mask_truth_rle))
    print (mask_truth, mask_truth_rle)
    score = dice_scores_img(mask_pred, mask_truth_rle)
#      pa, ji, haus = perf_metrics(mask_pred, mask_truth)
    print (score)
#     print (score,pa,ji,haus)
    
    sum_score += score
#     sum_pa += pa
#     sum_ji += ji
#     sum_haus += haus[0]

<class 'list'> <class 'numpy.ndarray'> <class 'numpy.int32'>
(47723, 23990) (23990, 47723)
1
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


  print (np.sum(mask_truth != mask_truth_rle))


0.05635099458042811
<class 'list'> <class 'numpy.ndarray'> <class 'numpy.int32'>
(22165, 29433) (29433, 22165)
1
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
0.09090414025476684
<class 'list'> <class 'numpy.ndarray'> <class 'numpy.int32'>
(43160, 33240) (33240, 43160)
1
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
0.052382748056409426
<class 'list'> <class 'numpy.ndarray'> <class 'numpy.int32'>
(47340, 30720) (30720, 47340)
1
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]] [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ..