In [4]:
import xml.etree.ElementTree as ET
from time import time
import numpy as np
from scipy import io
import os
import pandas as pd

In [5]:
# xml_path = absolute filepath of xml
# mdict = 2D coordinates of annotation by class and object

def xml2mat(xml_path):
    print(os.path.basename(xml_path))

    # Open XML file
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Read Class names and put into a list called classlut
    classlut = []
    for Annotation in root.iter('Annotation'):
        for Attrib in Annotation.iter('Attribute'):
            classlut.append(Attrib.attrib.get('Name'))
    classluts = sorted(classlut)

    dfs = []
    for idx, Annotation in enumerate(root.iter('Annotation')): #iterate each class
        for Region in Annotation.iter('Region'): #iterate each circle
            x = np.array([float(Vertex.get('X')) for Vertex in Region.iter('Vertex')]).astype('int') #iterate each vertex
            y = np.array([float(Vertex.get('Y')) for Vertex in Region.iter('Vertex')]).astype('int')
            objid = np.array([int(Region.get('Id'))])
            classname = np.array([classluts[idx]])
            df = pd.DataFrame({'classname': classname,
                               'objid': objid,
                               'x': [x],
                               'y': [y], })
            dfs.append(df)
    dff = pd.concat(dfs).reset_index(drop=True)
    #save as MAT file format
    mdict = {'x': dff['x'].tolist(), 'y': dff['y'].tolist(), 'objID': dff['objid'].tolist(), 'className': dff['classname'].tolist()}
    io.savemat(xml_path.replace('xml', 'mat'), mdict=mdict)
    return dff

In [7]:
src = r'\\fatherserverdw\Q\research\images\skin_aging\annotation\roi\xml'
start=time()
# List Comprehension
[xml2mat(os.path.join(src,xmlpth)) for xmlpth in os.listdir(src) if xmlpth.endswith('167.xml')]
print('readxml took {:.2f} sec'.format(time() - start))



167.xml
readxml took 0.02 sec
