In [29]:
#rotate and crop each tissue section in DL mask
#save rotational angle
#save bounding box coordinate and crop

In [38]:
#read json for centroid of each cell
#adjust centroid coordinates from WSI-space to crop/rotated-space
#assign cell type to each nuclei-object based on the pixel value of crop/rotated DL mask

In [39]:
import json
import pandas as pd
import pandas as pd
from PIL import Image
Image.MAX_IMAGE_PIXELS=None
import numpy as np
from skimage.measure import label
from skimage.morphology import closing, square, remove_small_objects, remove_small_holes
from skimage.transform import rotate
from math import atan2, degrees
import glob,os
from natsort import natsorted
from copy import deepcopy
import cv2
from time import time
from skimage.measure import regionprops
from matplotlib import pyplot as plt
import time

from openslide import OpenSlide

In [40]:
def rotate_image_cv2(mat, angle):
    """
    Rotates an image (angle in degrees) and expands image to avoid cropping
    """


    height, width = mat.shape[:2] # image shape has 3 dimensions


    image_center = (width/2, height/2) # getRotationMatrix2D needs coordinates in reverse order (width, height) compared to shape

    rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.)

    # rotation calculates the cos and sin, taking absolutes of those.
    abs_cos = abs(rotation_mat[0,0])
    abs_sin = abs(rotation_mat[0,1])

    # find the new width and height bounds
    bound_w = int(height * abs_sin + width * abs_cos)
    bound_h = int(height * abs_cos + width * abs_sin)

    # subtract old image center (bringing image back to origo) and adding the new image center coordinates
    rotation_mat[0, 2] += bound_w/2 - image_center[0]
    rotation_mat[1, 2] += bound_h/2 - image_center[1]

    # rotate image with the new bounds and translated rotation matrix
    rotated_mat = cv2.warpAffine(mat, rotation_mat, (bound_w, bound_h), flags=cv2.INTER_NEAREST)
    return rotated_mat

In [41]:
#inputs:
#outputs: corrected mask, image, nuclei, and nuclei centroid csv

In [42]:

# Opening JSON file
realjsonsrc = r'\\fatherserverdw\kyuex\clue images\hovernet_out\json'
roisrc = r'\\fatherserverdw\kyuex\clue images\annotations\roi\labeledmask_20rsf'
ndpisrc = r'\\fatherserverdw\kyuex\clue images'
dlsrc = r'\\fatherserverdw\kyuex\clue images\1um\classification_v9_combined'
ClUedegrot_df =pd.read_excel(r'\\fatherserverdw\kyuex\datadst\WSI_rotation_crop_LUT.xlsx')
src = r'\\fatherserverdw\kyuex\datadst'

filename = ClUedegrot_df["filename"]

ClUedegrot_df = pd.read_excel(r"\\fatherserverdw\kyuex\datadst\WSI_rotation_crop_LUT.xlsx")

In [43]:
_#mask: the Dl mask
#im: corresponding image
#ndpi: corresponding whole-slide image
#json: json file with nuclei coordinates
#src: destination to save output
#im_name: the name to save the images

#note: all outputs are in the dimension of the nuclei coordinates in wsi image
#outputs: corrected mask, image, nuclei, and nuclei centroid csv,verification image

def rotate_hovernet_dl_im(mask,ndpi, json,src,im_name,im_id,roi):

    start_time = time.time()
    json = pd.DataFrame(json[0].loc['nuc']).T.drop(columns=['type_prob'])
    json = json[json['contour'].map(len) > 5].reset_index(drop=True)

    nuc_df = pd.DataFrame(columns=['id','tissue_id','centroid','contour','section_id'])


    #need the original dimension of the wsi image, because the json file coordinates are based on it
    (w,h) = ndpi.level_dimensions[0]

    #create mask for all nuclei, record time
    nuc_image = np.zeros((h,w), dtype=np.int32) #need to flip h and w
    for idx,ct in enumerate(json['contour']):
         cv2.fillPoly(nuc_image, pts=[np.array(ct).astype(np.int32)],  color=idx+1)

    # Define minimum sizes of tissue objects at 1um resolution
    minTA = 20000
    minTAhole = 100
    minDermhole = 5000
    minepisize=1000
    whitespace=12 #define background pixel value

    print("--- %s seconds initializing ---" % (time.time() - start_time))
    #---------calculate rotation angle d0-------
    TAbig = np.array(mask)
    #label mask
    (width, height) = (mask.width // 10, mask.height // 10) #downsize by 10
    mask_resized = mask.resize((width, height), resample=0) #nearest interpolation to rescale
    TA = np.array(mask_resized) # convert DL mask to numpy array
    # separate touching tissue sections using WATERSHED segmentation
    sure_foreground = (2 < TA) & (TA < whitespace - 1) #define tissue section conservatively by removing corneum==1, and removing fat==12, so this foreground does not touch adjacent tissue section for sure.

    sure_fg = closing(sure_foreground, square(3))  # 13sec
    # remove small objects not connected to the tissue section
    sure_fg = remove_small_objects(sure_fg, min_size=minTA, connectivity=2)  # 6sec

    sure_fg = remove_small_holes(sure_fg, area_threshold=minTAhole).astype(np.uint8)

    bw = closing(TA < whitespace, square(3))  # 12 is background
    bw = remove_small_objects(bw, min_size=minTA, connectivity=2)
    bw = remove_small_holes(bw, area_threshold=minTAhole)
    # morphological opening : erosion followed by an dilation to remove small objects
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(bw.astype(np.uint8), cv2.MORPH_OPEN, kernel, iterations=2)  # 2sec
    # dilate to be more lenient in defining tissue section area
    sure_bg = cv2.dilate(opening, kernel, iterations=3)

    unknown = cv2.subtract(sure_bg, sure_fg).astype(np.bool)

    sure_fg_label = label(sure_fg).astype(np.int32)
    sure_fg_label = sure_fg_label + 1 #objects are 2~N
    sure_fg_label[unknown] = 0 #middle ground is 0

    #get labeled image
    TAbgr = cv2.cvtColor(TA, cv2.COLOR_GRAY2BGR) #convert downsized DL mask to BGR color channel
    label_image = cv2.watershed(TAbgr, sure_fg_label) #apply watershed using fg,bg,mg, defined mask

    # iterate each section
    epi = (TA == 1) | (TA == 2)
    derm = (2 < TA) & (TA < whitespace)
    derm = remove_small_holes(derm, area_threshold=minDermhole)

    epi2 = epi & ~derm
    epi2 = remove_small_objects(epi2, min_size=minepisize, connectivity=2)

    print("--- %s seconds sectioning ---" % (time.time() - start_time))

    #tmpClU_df = pd.DataFrame(columns=['imID','imname','secN','bbox1','degrot','bbox2'])

    roi_resized = cv2.resize(np.array(roi).astype(np.uint8), dsize=(w,h), interpolation=cv2.INTER_NEAREST)
    #--------iterate through sections------
    numsecmax = np.max(label_image)
    for numsec in range(1,numsecmax):
        print('section N: ', numsec, '/', numsecmax-1)

        msktmp = label_image == numsec+1
        mskepi = msktmp & epi2
        # align horizontal
        [xt0, yt0] = np.where(mskepi)
        vertices = np.array([xt0[::10], yt0[::10]]).T
        vc = vertices - vertices.mean(axis=0)

        U, S, Vt = np.linalg.svd(vc)
        k = Vt.T
        d0 = degrees(atan2(k[1, 1], k[1, 0]))
        if np.linalg.det(k)<0: d0=-d0

        #clear variables, we just need d0
        del vertices, vc, U, S, Vt,k,mskepi

        #resize TAtmp to size (73728, 17920), same as nuclei image
        TAtmp = cv2.resize(TAbig.astype(np.uint8), dsize=(w,h), interpolation=cv2.INTER_NEAREST)
        mskbig = cv2.resize(msktmp.astype(np.uint8), TAtmp.shape[::-1], interpolation=cv2.INTER_NEAREST)

        #im_resized =cv2.resize(np.array(im), dsize=(w,h), interpolation=cv2.INTER_NEAREST)

        # delete non-ROI from scaled up ROI
        kernel = np.ones((20, 20), np.uint8)
        mskbig = cv2.dilate(mskbig, kernel, iterations=3)
        TAtmp[mskbig == 0] = 0  #1sec

        #clear variables
        del kernel

        #crop, rotate, crop
        [xt, yt] = np.where(TAtmp[:,:])
        mskrot =TAtmp[np.min(xt):np.max(xt), np.min(yt):np.max(yt)]
        mskrot2 = rotate_image_cv2(mskrot, d0)
        #imrot = im_resized[np.min(xt):np.max(xt), np.min(yt):np.max(yt)]
        #imrot2 = rotate_image_cv2(imrot, d0)

        #clear variables
        del mskrot


                # if dermis is above epidermis, flip it
        [xt3, yt3] = np.where(mskrot2)
        [xt4, yt4] = np.where((mskrot2 == 1) | (mskrot2 == 2))
        if np.mean(xt3) - np.mean(xt4) < 0:
            mskrot2 = np.rot90(np.rot90(mskrot2))
            #imrot2 = np.rot90(np.rot90(imrot2))
            d0 += 180



        [xt2, yt2] = np.where(mskrot2)
        mskrot3 = mskrot2[np.min(xt2):np.max(xt2), np.min(yt2):np.max(yt2)]
        #imrot3 = imrot2[np.min(xt2):np.max(xt2), np.min(yt2):np.max(yt2)]

        del mskrot2



        mskrot3[mskrot3 == 0] = whitespace
        #imrot3[imrot3 == 0] = 235


        #TODO: can we assume that all parts of the nuclei are in the tissue?
        nuc_sec_mask = deepcopy(nuc_image)

        #section out nuclei in roi ?? (test)
        nuc_sec_mask[roi_resized != numsec] = 0
        plt.imshow(nuc_sec_mask)

        #we use the same xt,yt for mskbig
        nuc_sec_rot =nuc_sec_mask[np.min(xt):np.max(xt), np.min(yt):np.max(yt)]
        #rot nuclei
        nuc_sec_rot = rotate_image_cv2(nuc_sec_rot, d0)

        #crop again
        nuc_sec_rot2 =nuc_sec_rot[np.min(xt2):np.max(xt2), np.min(yt2):np.max(yt2)]

        #save images
        #Image.fromarray(nuc_sec_rot2.astype('uint32')).save(
            #os.path.join(src, '{}nuc_sec{:02d}.png'.format(im_name, numsec)))

        #Image.fromarray(mskrot3.astype('int8')).save(
            #os.path.join(src, '{}msk_sec{:02d}.png'.format(im_name, numsec)))

        #Image.fromarray(imrot3).save(
            #os.path.join(src, '{}im_sec{:02d}.png'.format(im_name, numsec)))

        #--------------------create csv for each tissue section-----------------------------
        nuc2_df = pd.DataFrame(columns=['id','tissue_id','centroid','contour','section_id'])
        stats = regionprops(nuc_sec_rot2.astype(np.uint32))
        ids = []
        cell_types = []
        contours = []
        centroids = []
        section_ids = np.full((len(stats),1),numsec)

        for idx, stat in enumerate(stats):
            ids.append((stat.label - 1))
            cent = np.round(stat.centroid).astype("uint32")
            centroids.append(cent)
            contours.append(stat.coords)
            tissue_id = mskrot3[cent[0],cent[1]] #row,col
            cell_types.append(tissue_id)

        nuc2_df['id']= ids
        nuc2_df['tissue_id']= cell_types
        nuc2_df['centroid']= centroids
        nuc2_df['contour'] = contours
        nuc2_df['section_id']=section_ids

        nuc_df = pd.concat([nuc_df, nuc2_df])
        nuc_df.to_csv(os.path.join(src, '{}_nuc_df.csv'.format(im_name)))

        #clu = {'imID':im_id,'imname':im_name,'secN':numsec,'bbox1':[np.min(xt),np.max(xt), np.min(yt),np.max(yt)],'degrot':d0,
              # 'bbox2':[np.min(xt2),np.max(xt2), np.min(yt2),np.max(yt2)]}


        row['bbox{}rot'.format(numsec)] = d0
        row['bbox{}A'.format(numsec)] = [np.min(xt),np.max(xt), np.min(yt),np.max(yt)]
        row['bbox{}B'.format(numsec)] = [np.min(xt2),np.max(xt2), np.min(yt2),np.max(yt2)]

        #tmpClU_df =  tmpClU_df.append(clu, ignore_index=True)

        print("--- %s seconds creatng csv---" % (time.time() - start_time))

        #---------create an image to visualize the nuclei centroids on the RGB tissue image
        #tmp_imrot3 = deepcopy(imrot3)
        #for cnt in nuc2_df['centroid']:
            #tmp_imrot3[cnt[0],cnt[1]] = [0,255,0]

        #Image.fromarray(tmp_imrot3).save(os.path.join(src,'{}_verify_nuclei_centroid_sec{:02d}.png'.format(im_name, numsec)))


    #return tmpClU_df
    print("--- %s seconds ---" % (time.time() - start_time))

In [50]:
for index, row in ClUedegrot_df.iterrows():
    try:
        fn = os.path.splitext(row['filename'])[0]
        #mask = Image.open(os.path.join(dlsrc,'{}.{}'.format(fn,'tif')))
        #ndpi = OpenSlide(os.path.join(ndpisrc,'{}.{}'.format(fn,'.ndpi')))

        #jsdst = os.path.join(realjsonsrc,'{}.{}'.format(fn,'json'))
        #json = pd.read_json(jsdst, orient='index')
        roi= Image.open(os.path.join(roisrc,'{}.{}'.format(fn,'png')))

        #rotate_hovernet_dl_im(mask,ndpi,json,src,fn,0,roi)
    except:#if files doesn't exist
        print("jere")
        continue
        #i will fill the values with zero later

In [9]:
#now it takes around 159 seconds per image