In [None]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd

In [None]:
def xml_to_df(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    append_df = []
    for index, Annotation in enumerate(root.iter("Annotation")):
        for Region in Annotation.iter('Region'):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([int(Region.get('Id'))])
            classnames = index + 1
            coord_dict = {"ClassNames": [classnames], "X": [x], "Y": [y], "ID": [id]}
            df = pd.DataFrame(data = coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    coord_df = pd.concat(append_df).reset_index(drop=True)
    return(coord_df)

In [None]:
# Then, input xml_path to use xml_to_df function to output X,Y coordinates for each annotation per class:
def coord_to_multiclass_df(xml_path,save_pkl = False):
    coord_df = xml_to_df(xml_path)
    coord_df = coord_df.drop(columns = ["ID"])
    dict = {"corneum" : 1,"spinosum": 2,"hairshaft":3,"hairfollicle":4,"smoothmuscle":5,"oil":6,"sweat":7,"nerve":8,"bloodvessel":9,"ecm":10,"fat":11,"white":12}
    coord_df = coord_df.replace({"ClassNames": dict})
    if save_pkl:
        dst_pth = r'\\fatherserverdw\Kevin\coord_df.pkl'
        coord_df.to_pickle(dst_pth) #save pkl
    return coord_df

In [None]:
# Then input original image and the coord_df to output the mask with unique annotations (1..N, N = 12 in this case):
def create_mask_multi_annot(xml_path, image_path, downsample_factor = 1): #choose downsample factor
    # slide = openslide.open_slide(image_path)
    # target_level = slide.get_best_level_for_downsample(downsample_factor)
    # target_dim = slide.level_dimensions[target_level]
    # rsf = [x/y for x,y in zip(slide.dimensions,target_dim)] #resize factor
    # mask = np.zeros(target_dim, dtype = np.uint8)
    iter_order = [2,10,5,4,6,11,7,9,8,12,3,1]
    coord_df = coord_to_multiclass_df(xml_path) #use function above

    for i in iter_order:
        coord_df_tmp = coord_df[coord_df.ClassNames == i]
        for idx, row in coord_df_tmp.iterrows():
            xx = row.X.astype('int32')
            yy = row.Y.astype('int32')
            contours = np.array(list(zip(xx,yy)))
            contours = contours/rsf[0]
            class_number = row.ClassNames
            mask = cv2.fillPoly(mask, pts=[contours.astype(int)], color=(int(class_number)))
    return mask