# HubMap- Hacking the Kidney
#### Goal - Mapping the human body at function tissue unit level - detect glomeruli FTUs in kidney

#### Converting the masks from JSON format to Run-Length Encoding (RLE) format

###### Step 1 - Install useful libraries

In [1]:
import json
import numpy as np
import cv2
import tifffile
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import pandas as pd 
import glob
import gc
import csv

###### Step 2 - Utility functions to read the mask from JSON file, make it into an image and further, to an rle.

In [2]:
def verify_read(file_list):
    for file_name in file_list:
        baseimage = tifffile.imread(file_name)
        print('img id = {}, shape = {}'.format(file_name,baseimage.shape))
        gc.collect()

def read_mask_kidney(mask_file, mask_shape):
    read_file = open(mask_file, "r", encoding='utf-8') 
    mask_data = json.load(read_file)
    polys = []
    for index in range(mask_data.__len__()):
        geom = np.array(mask_data[index]['geometry']['coordinates'], dtype=np.int32)
        polys.append(geom)

    mask = np.zeros(mask_shape)
    cv2.fillPoly(mask, polys, 1)
    mask = mask.astype(bool)
    return mask

def read_mask_colon(mask_file, mask_shape):
    read_file = open(mask_file, "r", encoding='utf-8') 
    mask_data = json.load(read_file)
    polys_tl = []
    polys_tr = []
    polys_bl = []
    polys_br = []
    for index in range(mask_data.__len__()):
        geom = np.array(mask_data[index]['geometry']['coordinates'], dtype=np.int32)
        sub_tl = []
        sub_tr = []
        sub_bl = []
        sub_br = []
        for coords in geom[0]:
            X = 4536 
            Y = 4704
            if coords[0]<X and coords[1]<Y:
                sub_tl.append(coords)
            elif coords[0]>=X and coords[1]<Y:
                sub_tr.append([coords[0] - X, coords[1]])
            elif coords[0]<X and coords[1]>=Y:
                sub_bl.append([coords[0], coords[1] - Y])
            elif coords[0]>=X and coords[1]>=Y:
                sub_br.append([coords[0] - X, coords[1] - Y])
            else:
                print (coords)
        geom_tl = np.array(sub_tl, ndmin=3, dtype='int32')
        if geom_tl !=  []:
            polys_tl.append (geom_tl)
        geom_tr = np.array(sub_tr, ndmin=3, dtype='int32')
        if geom_tr != []:
            polys_tr.append (geom_tr)
        geom_bl = np.array(sub_bl, ndmin=3, dtype='int32')
        if geom_bl != []:
            polys_bl.append (geom_bl)
        geom_br = np.array(sub_br, ndmin=3, dtype='int32')
        if geom_br != []:
            polys_br.append (geom_br)
        
    new_shape = (mask_shape[0]//2, mask_shape[1]//2, 3)
    mask_tl = np.zeros(new_shape)        
    mask_tr = np.zeros(new_shape)
    mask_bl = np.zeros(new_shape)
    mask_br = np.zeros(new_shape)

    cv2.fillPoly(mask_tl, polys_tl, 1)
    mask_tl = mask_tl.astype(bool)    
    cv2.fillPoly(mask_tr, polys_tr, 1)
    mask_tr = mask_tr.astype(bool)
    cv2.fillPoly(mask_bl, polys_bl, 1)
    mask_bl = mask_bl.astype(bool)
    cv2.fillPoly(mask_br, polys_br, 1)
    mask_br = mask_br.astype(bool)
    return {'.jpg_top_left':mask_tl, '.jpg_top_right':mask_tr, '.jpg_bottom_left':mask_bl, '.jpg_bottom_right':mask_br}


def mask2rle(img):
    pixels = img.T.flatten()
    runs = np.where(pixels[1:] != pixels[:-1])[0]
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

###### Step 3 - Save the RLE into CSV format

In [3]:
dataset = "kidney"

In [5]:
if dataset == 'kidney':
    basepath = r'C:\Users\soodn\Downloads\Naveksha\Kaggle HuBMAP\Data\hubmap-kidney-segmentation-data'
    df_test = pd.read_csv(basepath+'/test_empty.csv')
    filepath = basepath +'/test/*.tiff'
    file_list = glob.glob(filepath)
    rle = []
    for image_idx in range(len(file_list)):
        image_name = df_test['id'][image_idx]
        working_image = tifffile.imread(basepath+'/test/'+image_name+'.tiff')
        working_image_json_mask = basepath+'/test/'+image_name+'.json'
        read_file = open(working_image_json_mask, "r")
        mask_data = json.load(read_file) 
        if working_image.shape[0] == 3:
            working_image = working_image.swapaxes(0,1)
            working_image = working_image.swapaxes(1,2)
        mask_shape = working_image.shape
        mask = read_mask_kidney(working_image_json_mask, mask_shape)
        mask_int = mask*1
        encoding = mask2rle(mask_int)
        rle.append((image_name, encoding))
    file = open('test_empty.csv', 'w+', newline ='')
  
    with file:    
        write = csv.writer(file)
        write.writerow(['id', 'encoding'])
        write.writerows(rle)
    
elif dataset == 'colon':
    basepath = r'C:\Users\soodn\Downloads\Naveksha\Kaggle HuBMAP\Data\hubmap_colon_data'
    filepath = basepath +'/jpg images/*.tif'
    file_list = glob.glob(filepath)
    image_ids = ['CL_HandE_1234_B004', 'HandE_B005_CL_b_RGB']
    rle = []
    for file in (image_ids):
        working_image = tifffile.imread(basepath+'/tif images/'+file+'.tif')
        working_image_json_mask = basepath+'/Annotations/json/'+file+'_annotations.json'
        read_file = open(working_image_json_mask, "r")
        mask_data = json.load(read_file) 
        if working_image.shape[0] == 3:
            working_image = working_image.swapaxes(0,1)
            working_image = working_image.swapaxes(1,2)    
        mask_shape = working_image.shape
        masks = read_mask_colon(working_image_json_mask, mask_shape)    
        for key, mask in masks.items(): 
            mask_int = mask*1
            encoding = mask2rle(mask_int)
            rle.append((file, key, encoding))
    encoding = {}
    for val in rle:
        img, slide, enc = val[0], val[1], val[2]
        encoding[img+slide] = enc
    with open('colon_rle.csv', 'w') as f:
        w = csv.writer(f)
        w.writerow(['id', 'encoding'])
        w.writerows(encoding.items())