In [36]:
import xml.etree.ElementTree as ET
from time import time
import numpy as np
import cv2
import os
import pandas as pd
from openslide import OpenSlide
from PIL import Image
Image.MAX_IMAGE_PIXELS=None

In [37]:
# xml_path = absolute filepath of xml
# mdict = 2D coordinates of annotation by class and object

def xml2mask(xml_path,imsrc):

    fol,fn = os.path.split(xml_path)
    imfn = fn.replace('xml','ndpi')
    mskdst = os.path.join(fol,'labeledmask')
    dstfn = os.path.join(mskdst, '{}.png'.format(imfn.replace('.ndpi','')))
    if os.path.exists(dstfn):
        return

    if not imfn.replace('ndpi','tif') in os.listdir(r"\\fatherserverdw\kyuex\clue images\1um"):
        return

    print(os.path.basename(xml_path))
    # Open XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Read Class names and put into a list called classlut
    # classlut = []
    # for Annotation in root.iter('Annotation'):
    #     for Attrib in Annotation.iter('Attribute'):
    #         classlut.append(Attrib.attrib.get('Name'))
    # classluts = sorted(classlut)
    classluts = ['tissue']
    dfs = []
    for idx, Annotation in enumerate(root.iter('Annotation')): #iterate each class
        for Region in Annotation.iter('Region'): #iterate each circle
            x = np.array([float(Vertex.get('X')) for Vertex in Region.iter('Vertex')]).astype('int') #iterate each vertex
            y = np.array([float(Vertex.get('Y')) for Vertex in Region.iter('Vertex')]).astype('int')
            objid = np.array([int(Region.get('Id'))])
            classname = np.array([classluts[idx]])
            df = pd.DataFrame({'classname': classname,
                               'objid': objid,
                               'x': [x],
                               'y': [y], })
            dfs.append(df)

    dff = pd.concat(dfs).reset_index(drop=True)

    if not os.path.exists(mskdst):
        os.mkdir(mskdst)



    slide = OpenSlide(os.path.join(imsrc,imfn))
    rsf = 1  #8um = 1.25x #4um = 2.5x, #2um=5x, 1um=10x, 0.5um=20x, 0.25um=40x
    rsf = rsf/float(slide.properties['openslide.mpp-x'])

    imdim = slide.dimensions
    imdim = [round(np.ceil(_/rsf)) for _ in imdim]

    mask = np.zeros(imdim[::-1], dtype = np.uint8) #white
    for idx,elem in dff.iterrows():
        contours = np.array([elem['x'],elem['y']])
        contours2 = (contours/rsf).astype(int)
        mask = cv2.fillPoly(mask, pts=[contours2.T], color=idx+1)

    Image.fromarray(mask.astype('int8')).save(dstfn)

In [38]:
src = r'\\fatherserverdw\kyuex\clue images\annotations\roi'
imsrc = r"\\fatherserverdw\kyuex\clue images"
start=time()
# List Comprehension
[xml2mask(os.path.join(src,xmlpth),imsrc) for xmlpth in os.listdir(src) if xmlpth.endswith('.xml')]
print('readxml took {:.2f} sec'.format(time() - start))

2022-07-07 18.26.18.xml
readxml took 7.28 sec


In [39]:
xml_path = r"\\fatherserverdw\kyuex\clue images\annotations\roi\2022-06-07 13.29.45.xml"
tree = ET.parse(xml_path)
root = tree.getroot()

# Read Class names and put into a list called classlut
classlut = []
for Annotation in root.iter('Annotation'):
    for Attrib in Annotation.iter('Attribute'):
        classlut.append(Attrib.attrib.get('Name'))
classluts = sorted(classlut)
classluts

['tissue']