# Parse XML annotation file with X,Y coordinates and instance ID into a DataFrame

In [1]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import os

In [2]:
# for pc:
# src = r'\\fatherserverdw\kyuex\clue images'
# for mac:
src = r'//Volumes/kyuex/clue images'
xml = [_ for _ in os.listdir(src) if _.endswith('xml')]
xml

['2022-06-07 13.18.40.xml']

In [3]:
xml_path = os.path.join(src,xml[0])
tree = ET.parse(xml_path)
root = tree.getroot()
root

<Element 'Annotations' at 0x7fb2a2974b30>

In [4]:
for Annotation in root.iter("Annotation"):
    for Region in Annotation.iter('Region'): #iterate over the Region so we can iterate over id 1 and 2 (two circles):
        x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
        y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
        id = np.array([Region.get('Id')])
        id = [id for a in range(len(x))]
        coord_dict = {"X": x, "Y": y, "ID": id}
        df = pd.DataFrame(data = coord_dict)
        df.ID = df.ID.astype(int)
        print(df)

         X      Y  ID
0     5615  10850   1
1     5725  10890   1
2     5810  10930   1
3     5865  10950   1
4     5900  10960   1
...    ...    ...  ..
996   5425  10785   1
997   5530  10825   1
998   5595  10855   1
999   5610  10855   1
1000  5615  10850   1

[1001 rows x 3 columns]
         X      Y  ID
0    24299  20025   2
1    24364  20068   2
2    24483  20133   2
3    24537  20176   2
4    24613  20209   2
..     ...    ...  ..
822  24244  20025   2
823  24266  20046   2
824  24288  20079   2
825  24288  20090   2
826  24299  20025   2

[827 rows x 3 columns]


### Recreating this as a function:


In [5]:
def xml_to_df(xml_filepath):
    append_df = []
    tree = ET.parse(xml_filepath)
    root = tree.getroot()
    for Annotation in root.iter("Annotation"):
        for Region in Annotation.iter('Region'): #iterate over the Region so we can iterate over id 1 and 2 (two circles):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([Region.get('Id')])
            id = [id for a in range(len(x))]
            coord_dict = {"X": x, "Y": y, "ID": id}
            df = pd.DataFrame(data = coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    return(append_df)

### Run the function (in mac file path):

In [6]:
my_coord_df = xml_to_df('//Volumes/kyuex/clue images/2022-06-07 13.18.40.xml')
my_coord_df

[         X      Y  ID
 0     5615  10850   1
 1     5725  10890   1
 2     5810  10930   1
 3     5865  10950   1
 4     5900  10960   1
 ...    ...    ...  ..
 996   5425  10785   1
 997   5530  10825   1
 998   5595  10855   1
 999   5610  10855   1
 1000  5615  10850   1
 
 [1001 rows x 3 columns],
          X      Y  ID
 0    24299  20025   2
 1    24364  20068   2
 2    24483  20133   2
 3    24537  20176   2
 4    24613  20209   2
 ..     ...    ...  ..
 822  24244  20025   2
 823  24266  20046   2
 824  24288  20079   2
 825  24288  20090   2
 826  24299  20025   2
 
 [827 rows x 3 columns]]