# HubMap- Hacking the Kidney
#### Goal - Mapping the human body at function tissue unit level - detect glomeruli FTUs in kidney

#### Converting the colon images to its four constituent slides

###### Step 1 - Install useful libraries

In [2]:
import cv2
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import shutil
import tensorflow as tf
import glob
import tifffile
import gc
import imageio
from PIL import Image

##### Step 2 - Get a look and feel of data

In [3]:
basepath = r'C:\Users\soodn\Downloads\Naveksha\Kaggle HuBMAP\Data\hubmap_colon_data'

##### Step 3 - Read each .tiff file and see its shape

In [4]:
def verify_read(file_list):
    for file_name in file_list:
        baseimage = tifffile.imread(file_name)
        print('img id = {}, shape = {}'.format(file_name,baseimage.shape))
        gc.collect()
        
file_list = glob.glob(basepath+'/tif images/*.tif')
print (file_list)
verify_read(file_list)

['C:\\Users\\soodn\\Downloads\\Naveksha\\Kaggle HuBMAP\\Data\\hubmap_colon_data/tif images\\CL_HandE_1234_B004.tif', 'C:\\Users\\soodn\\Downloads\\Naveksha\\Kaggle HuBMAP\\Data\\hubmap_colon_data/tif images\\HandE_B005_CL_b_RGB.tif']


TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'


img id = C:\Users\soodn\Downloads\Naveksha\Kaggle HuBMAP\Data\hubmap_colon_data/tif images\CL_HandE_1234_B004.tif, shape = (3, 9072, 9408)
img id = C:\Users\soodn\Downloads\Naveksha\Kaggle HuBMAP\Data\hubmap_colon_data/tif images\HandE_B005_CL_b_RGB.tif, shape = (9072, 9408, 3)


##### Step 4 - Read one image and corresponding mask, visualize it, swap the axis if required.

In [5]:
def read_mask(mask_file, mask_shape):
    read_file = open(mask_file, "r", encoding='utf-8') 
    mask_data = json.load(read_file)
    polys = []
    for index in range(mask_data.__len__()):
        geom = np.array(mask_data[index]['geometry']['coordinates'], dtype=np.int32)
        polys.append(geom)

    mask = np.zeros(mask_shape)
    cv2.fillPoly(mask, polys, 1)
    mask = mask.astype(bool)
    return mask

In [15]:
image_ids = ['CL_HandE_1234_B004', 'HandE_B005_CL_b_RGB']
for image, file in zip (image_ids, file_list):
    working_image_id = file
    baseimage = tifffile.imread(working_image_id)
    print ('Original image shape', baseimage.shape)
    baseimage = np.squeeze(baseimage)
    print (baseimage.shape)
    if( baseimage.shape[0] == 3):
        baseimage = baseimage.swapaxes(0,1)
        baseimage = baseimage.swapaxes(1,2)
        print ('Swaped shape',baseimage.shape)
    
    # Divide the colon image into its 4 constituent slides
    top_left = Image.fromarray(baseimage[0:4536, 0:4704], 'RGB')
    top_left.save('OutputColon/'+image+'_topleft.tif')
    top_right = Image.fromarray(baseimage[4536:, 0:4704], 'RGB')
    top_right.save('OutputColon/'+image+'_topright.tif')
    bottom_left = Image.fromarray(baseimage[0:4536, 4704:], 'RGB')
    bottom_left.save('OutputColon/'+image+'_bottomleft.tif')
    bottom_right = Image.fromarray(baseimage[4536:, 4704:], 'RGB')
    bottom_right.save('OutputColon/'+image+'_bottomright.tif')
    
    working_image_json_mask = basepath+'/Annotations/json/'+image+'_annotations.json'
    read_file = open(working_image_json_mask, "r")
    mask_data = json.load(read_file)    
    mask_shape = (baseimage.shape[0], baseimage.shape[1])
    mask_bool = read_mask(working_image_json_mask, mask_shape)
    mask = mask_bool*1
    
    imageio.imwrite('OutputColon/'+image+'_mask_topleft.tif', mask[0:4536, 0:4704])
    top_right_mask = Image.fromarray(mask[4536:, 0:4704], 'RGB')
    top_right_mask.save('OutputColon/'+image+'_mask_topright.tif')
    bottom_left_mask = Image.fromarray(mask[0:4536, 4704:], 'RGB')
    bottom_left_mask.save('OutputColon/'+image+'_mask_bottomleft.tif')
    bottom_right_mask = Image.fromarray(mask[4536:, 4704:], 'RGB')
    bottom_right_mask.save('OutputColon/'+image+'_mask_bottomright.tif')
    

Original image shape (3, 9072, 9408)
(3, 9072, 9408)
Swaped shape (9072, 9408, 3)


TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'


Original image shape (9072, 9408, 3)
(9072, 9408, 3)


# Stop here!! 
### One of the colon images did not get divided properly and masks did not show up from this method and captured some additional artifacts in the subslides. Hence, we further obtain the sub-slides directly from the QuPath, and work on that. 