### Code to generate inference data tiles to test pytorch deeplab model. Most funtions from xml2trainingdata.ipynb

In [5]:
OPENSLIDE_PATH = r'C:\Users\Kevin\Downloads\openslide-win64-20230414\bin'

import os
if hasattr(os, 'add_dll_directory'):
    # Python >= 3.8 on Windows
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide

import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import openslide
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
from matplotlib import pyplot as plt
import skimage.measure
import matplotlib.pyplot as plt
import scipy.stats as stats
import torchvision.transforms as transforms
from glob import glob
from time import time
from skimage.measure import label

In [None]:
# First, input xml_filepath and output a dataframe of X,Y coordinates in general. (can be used for ROI as well)
def xml_to_df(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    append_df = []
    for index, Annotation in enumerate(root.iter("Annotation")):
        for Region in Annotation.iter('Region'):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([int(Region.get('Id'))])
            classnames = index + 1
            coord_dict = {"ClassNames": [classnames], "X": [x], "Y": [y], "ID": [id]}
            df = pd.DataFrame(data = coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    coord_df = pd.concat(append_df).reset_index(drop=True)
    return(coord_df)

In [12]:
# Then, input xml_path to use xml_to_df function to output X,Y coordinates for each annotation per class:
def coord_to_multiclass_df(xml_path):
    coord_df = xml_to_df(xml_path)
    coord_df = coord_df.drop(columns = ["ID"])
    dict = {"corneum" : 1,"spinosum": 2,"hairshaft":3,"hairfollicle":4,"smoothmuscle":5,"oil":6,"sweat":7,"nerve":8,"bloodvessel":9,"ecm":10,"fat":11,"white":12}
    coord_df = coord_df.replace({"ClassNames": dict})
    return coord_df

In [13]:
xml_src = r"\\shelter\Kyu\skin_aging\clue_cohort\annotations\12class\2022-06-14 15.39.21.xml"
img_src = r"\\shelter\Kyu\skin_aging\clue_cohort\wsi\2022-06-14 15.39.21.ndpi"
multi_coord_df = coord_to_multiclass_df(xml_src)
multi_coord_df

Unnamed: 0,ClassNames,X,Y
0,1,"[37653, 37655, 37657, 37659, 37664, 37668, 376...","[26262, 26264, 26264, 26264, 26264, 26266, 262..."
1,1,"[43239, 43239, 43238, 43237, 43237, 43237, 432...","[29633, 29634, 29634, 29636, 29637, 29638, 296..."
2,1,"[46442, 46442, 46442, 46444, 46446, 46446, 464...","[31652, 31654, 31656, 31659, 31661, 31663, 316..."
3,1,"[40550, 40548, 40548, 40547, 40547, 40545, 405...","[27686, 27686, 27684, 27684, 27682, 27682, 276..."
4,2,"[37619, 37618, 37618, 37617, 37616, 37614, 376...","[26556, 26556, 26555, 26555, 26555, 26555, 265..."
...,...,...,...
71,12,"[43404, 43405, 43405, 43405, 43406, 43407, 434...","[29588, 29584, 29583, 29582, 29581, 29579, 295..."
72,12,"[39544, 39542, 39541, 39540, 39540, 39540, 395...","[36137, 36137, 36137, 36137, 36138, 36139, 361..."
73,12,"[36964, 36964, 36965, 36966, 36968, 36970, 369...","[30517, 30516, 30513, 30511, 30508, 30506, 305..."
74,12,"[46791, 46794, 46801, 46804, 46808, 46820, 468...","[31307, 31310, 31314, 31318, 31321, 31330, 313..."


### Make 1024 x 1024 tiles for the xmin:xmax and ymin:ymax to get the parts of WSI with annotations. Save the corresponding 1024 x 1024 masks of the tiles. Then for each tile you put them in for inference, and then use cv2.findContours to filter out the results so that you only evaluate on the annotated part of the image.

In [14]:
# Then input original image and the coord_df to output the mask with unique annotations (1..N, N = 12 in this case):
def create_mask_multi_annot(xml_path, image_path, downsample_factor = 1): #choose downsample factor
    slide = openslide.open_slide(image_path)
    target_level = slide.get_best_level_for_downsample(downsample_factor)
    target_dim = slide.level_dimensions[target_level]
    rsf = [x/y for x,y in zip(slide.dimensions,target_dim)] #resize factor
    print("rsf is {}".format(rsf))
    mask = np.zeros(target_dim, dtype = np.uint8)
    iter_order = [2,10,5,4,6,11,7,9,8,12,3,1]
    coord_df = coord_to_multiclass_df(xml_path) #use function above

    for i in iter_order:
        coord_df_tmp = coord_df[coord_df.ClassNames == i]
        for idx, row in coord_df_tmp.iterrows():
            xx = row.X.astype(float).astype('int32')
            yy = row.Y.astype(float).astype('int32')
            contours = np.array(list(zip(xx,yy)))
            contours = contours/rsf[0]
            class_number = row.ClassNames
            mask = cv2.fillPoly(mask, pts=[contours.astype(int)], color=(int(class_number)))
    return mask

In [None]:
create_mask_multi_annot(xml_src,)

In [None]:
# cv2 method:
# Input mask and create binary mask and then output label of connected regions:
def create_binary_mask_label(xml_path, image_path):
    mask = create_mask_multi_annot(xml_path, image_path, downsample_factor = 1)
    binary_mask = mask > 0
    _, binary_mask_label = cv2.connectedComponents(binary_mask.astype(np.uint8))
    return binary_mask_label #returns label of connected regions

In [None]:
# Input label of connected regions and then output the final images and masks:
def create_final_mask_image(xml_path, image_path, dstpath_mask, dstpath_image, downsample_factor = 1):
    slide = openslide.open_slide(image_path)
    target_level = slide.get_best_level_for_downsample(downsample_factor)
    target_dim = slide.level_dimensions[target_level]
    image = slide.read_region(location=(0,0),level=target_level,size=target_dim)
    imagearr = np.array(image)
    imagearr = imagearr[:,:,:3]
    binary_mask_label = create_binary_mask_label(xml_path, image_path)
    print("For image with xml path {}, total of {} connected objects".format(xml_path,np.max(binary_mask_label)))
    mask = create_mask_multi_annot(xml_path, image_path, downsample_factor = 1)
    for idx,label in enumerate(range(1,np.max(binary_mask_label)+1)):
        boo = binary_mask_label == label
        boolabel = boo * label
        loca = np.where(boolabel == label)
        x = loca[0]
        y = loca[1]
        targetmask = mask[min(x):max(x),min(y):max(y)]
        dstpth = dstpath_mask + str(idx)+'.png'
        Image.fromarray(targetmask).save(dstpth)
        targetim = imagearr[min(x):max(x),min(y):max(y),:]
        dstpth1 = dstpath_image + str(idx)+'.png'
        Image.fromarray(targetim).save(dstpth1)