In [86]:
import xml.etree.ElementTree as ET
from time import time
import numpy as np
import cv2
import os
import pandas as pd
from openslide import OpenSlide
from PIL import Image
Image.MAX_IMAGE_PIXELS=None

In [87]:
# xml_path = absolute filepath of xml
# mdict = 2D coordinates of annotation by class and object

def xml2mask(xml_path,imsrc):
    try:
        fol,fn = os.path.split(xml_path)
        imfn = fn.replace('xml','ndpi')
        mskdst = os.path.join(fol,'labeledmask_10um')
        dstfn = os.path.join(mskdst, '{}.png'.format(imfn.replace('.ndpi','')))
        if os.path.exists(dstfn):
            return

        # if not imfn.replace('ndpi','tif') in os.listdir(r"\\fatherserverdw\kyuex\clue images\1um"):
        #     return

        print(os.path.basename(xml_path))
        # Open XML file
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Read Class names and put into a list called classlut
        # classlut = []
        # for Annotation in root.iter('Annotation'):
        #     for Attrib in Annotation.iter('Attribute'):
        #         classlut.append(Attrib.attrib.get('Name'))
        # classluts = sorted(classlut)
        classluts = ['tissue']
        dfs = []
        for idx, Annotation in enumerate(root.iter('Annotation')): #iterate each class
            for Region in Annotation.iter('Region'): #iterate each circle
                x = np.array([float(Vertex.get('X')) for Vertex in Region.iter('Vertex')]).astype('int') #iterate each vertex
                y = np.array([float(Vertex.get('Y')) for Vertex in Region.iter('Vertex')]).astype('int')
                objid = np.array([int(Region.get('Id'))])
                classname = np.array([classluts[idx]])
                df = pd.DataFrame({'classname': classname,
                                   'objid': objid,
                                   'x': [x],
                                   'y': [y], })
                dfs.append(df)

        dff = pd.concat(dfs).reset_index(drop=True)

        if not os.path.exists(mskdst):
            os.mkdir(mskdst)



        slide = OpenSlide(os.path.join(imsrc,imfn))
        rgb_dim = slide.dimensions
        target_level = slide.get_best_level_for_downsample(20)
        target_dim = slide.level_dimensions[target_level]
        rsf = [x/y for x,y in zip(rgb_dim,target_dim)][0]
        TA = slide.read_region(location=(0,0),level=target_level,size=slide.level_dimensions[target_level])
        TA = np.array(TA)
        TA = TA[:,:,0]>230
        TA = np.sum(TA)

        # slide = OpenSlide(os.path.join(imsrc,imfn))
        # rsf = 10 #8um = 1.25x #4um = 2.5x, #2um=5x, 1um=10x, 0.5um=20x, 0.25um=40x
        # rsf = rsf/float(slide.properties['openslide.mpp-x'])
        # target_dim = slide.dimensions
        # target_dim = [round(np.ceil(_/rsf)) for _ in imdim]

        mask = np.zeros(target_dim[::-1], dtype = np.uint8) #white
        for idx,elem in dff.iterrows():
            contours = np.array([elem['x'],elem['y']])
            contours2 = (contours/rsf).astype(int)
            mask = cv2.fillPoly(mask, pts=[contours2.T], color=idx+1)

         # Image.fromarray(mask.astype('int8')).save(dstfn)

        ROIA = np.sum(mask)
        ratio = round(ROIA/TA*100)
    except:
        ratio = 0
        ROIA = 0
        TA = 0
        print('create roi for ',fn)
    return [fn,ROIA,TA,ratio]


In [88]:
src = r'\\fatherserverdw\kyuex\clue images\annotations\roi'
imsrc = r"\\fatherserverdw\kyuex\clue images"
LUT = r"\\fatherserverdw\kyuex\imlist_all.xlsx"
LUT = pd.read_excel(LUT)
xmlist = LUT['filename'][(LUT['student score']>1) & (LUT['Block or Slide?']=="Both")]
xmlist

74      2022-06-07 16.05.45.ndpi
87      2022-06-07 16.10.49.ndpi
96      2022-06-07 17.10.29.ndpi
102     2022-06-07 17.15.33.ndpi
110     2022-06-07 17.43.53.ndpi
                  ...           
1145    2022-07-07 19.56.55.ndpi
1150    2022-07-07 20.05.53.ndpi
1155    2022-07-07 20.21.33.ndpi
1156    2022-07-07 20.54.02.ndpi
1172    2022-07-07 22.11.00.ndpi
Name: filename, Length: 163, dtype: object

In [92]:
len(xmlist),np.array(list).shape

(163, (163, 4))

In [90]:
start=time()
# List Comprehension
xmlist = [os.path.join(src,xmlpth.replace('ndpi','xml')) for xmlpth in xmlist]
list = [xml2mask(os.path.join(src,xmlpth),imsrc) for xmlpth in xmlist if xmlpth.endswith('.xml')]
print('readxml took {:.2f} sec'.format(time() - start))
list

2022-06-07 16.05.45.xml
2022-06-07 16.10.49.xml
2022-06-07 17.10.29.xml
2022-06-07 17.15.33.xml
2022-06-07 17.43.53.xml
2022-06-07 17.48.30.xml
2022-06-08 16.52.37.xml
2022-06-08 17.04.20.xml
2022-06-08 17.14.34.xml
2022-06-08 17.25.47.xml
2022-06-08 17.33.23.xml
2022-06-08 17.38.55.xml
2022-06-08 17.54.57.xml
create roi for  2022-06-08 17.54.57.xml
2022-06-08 18.08.15.xml
2022-06-08 18.13.05.xml
2022-06-08 18.23.12.xml
2022-06-08 18.27.20.xml
2022-06-09 11.32.55.xml
2022-06-09 11.36.47.xml
2022-06-09 11.46.15.xml
2022-06-09 11.51.43.xml
2022-06-09 12.20.35.xml
2022-06-09 12.48.49.xml
2022-06-09 15.59.56.xml
2022-06-09 16.05.39.xml
2022-06-09 16.46.04.xml
2022-06-09 16.53.12.xml
2022-06-09 16.56.40.xml
2022-06-09 17.17.37.xml
2022-06-09 17.29.25.xml
2022-06-09 17.39.52.xml
2022-06-09 17.54.10.xml
2022-06-09 18.02.27.xml
2022-06-09 18.09.31.xml
2022-06-09 18.31.45.xml
2022-06-09 18.34.58.xml
2022-06-09 18.52.30.xml
2022-06-09 19.01.26.xml
2022-06-09 19.13.25.xml
2022-06-09 19.55.38.xml


[['2022-06-07 16.05.45.xml', 3552318, 8187079, 43],
 ['2022-06-07 16.10.49.xml', 424640, 3614237, 12],
 ['2022-06-07 17.10.29.xml', 238360, 7070978, 3],
 ['2022-06-07 17.15.33.xml', 2082174, 6046388, 34],
 ['2022-06-07 17.43.53.xml', 208008, 6490302, 3],
 ['2022-06-07 17.48.30.xml', 435544, 7798024, 6],
 ['2022-06-08 16.52.37.xml', 160063, 9343141, 2],
 ['2022-06-08 17.04.20.xml', 626975, 21432925, 3],
 ['2022-06-08 17.14.34.xml', 457731, 10306190, 4],
 ['2022-06-08 17.25.47.xml', 1577180, 7114900, 22],
 ['2022-06-08 17.33.23.xml', 570024, 6269849, 9],
 ['2022-06-08 17.38.55.xml', 518096, 6283731, 8],
 ['2022-06-08 17.54.57.xml', 0, 0, 0],
 ['2022-06-08 18.08.15.xml', 2517843, 5916652, 43],
 ['2022-06-08 18.13.05.xml', 2199510, 11297356, 19],
 ['2022-06-08 18.23.12.xml', 1085163, 7433300, 15],
 ['2022-06-08 18.27.20.xml', 1187989, 4588444, 26],
 ['2022-06-09 11.32.55.xml', 149663, 4244864, 4],
 ['2022-06-09 11.36.47.xml', 2306883, 9377548, 25],
 ['2022-06-09 11.46.15.xml', 1429091, 868

In [91]:
pd.DataFrame(np.array(list),columns=['fn','ROIA','TA','ratio']).to_csv(r"\\fatherserverdw\kyuex\ROITA_ratio.csv")