# Parse XML annotation file with X,Y coordinates and instance ID into a DataFrame

In [1]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
#import os

In [2]:
def xml_to_df(xml_filepath):
    append_df = []
    tree = ET.parse(xml_filepath)
    root = tree.getroot()
    for Annotation in root.iter("Annotation"):
        for Region in Annotation.iter('Region'): #iterate over the Region so we can iterate over id 1 and 2 (two circles):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([Region.get('Id')])
            coord_dict = {"X": [x], "Y": [y], "ID": [id]}
            df = pd.DataFrame(data = coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    coord_df = pd.concat(append_df)
    return(coord_df)

In [3]:
# # for pc:
# # src = r'\\fatherserverdw\kyuex\clue images'
# # for mac:
# src = r'//Volumes/kyuex/clue images'
# xml = [_ for _ in os.listdir(src) if _.endswith('xml')]
# xml

coord_df = xml_to_df('//Volumes/kyuex/clue images/2022-06-07 13.18.40.xml')
coord_df

Unnamed: 0,X,Y,ID
0,"[5615, 5725, 5810, 5865, 5900, 5915, 5930, 594...","[10850, 10890, 10930, 10950, 10960, 10970, 109...",1
0,"[24299, 24364, 24483, 24537, 24613, 24624, 246...","[20025, 20068, 20133, 20176, 20209, 20231, 202...",2


# Convert X,Y Coordinates to Binary Mask

In [4]:
import cv2

def df_to_mask(coord_df):
    dfs = []
    for i in np.arange(len(coord_df)):
        xx = coord_df.iloc[i].X
        yy = coord_df.iloc[i].Y
        xy = list(zip(xx,yy))
        x_dim = len(xy) #not sure how much "pixels" should this blank image have
        y_dim = x_dim #not sure how much "pixels" should this blank image have
        contours = np.array(xy)
        image = np.zeros([x_dim,y_dim])
        df = pd.DataFrame(cv2.drawContours(image,[contours.astype(int)],-1,(0,255,0),3)) #contourIdx = -1, draw all contours
        dfs.append(df)
    dfs = pd.concat(dfs)
    return dfs

mask = df_to_mask(coord_df)
mask
#is this because input isn't an image? should i feed an arbitrary white image?

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,1000
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
822,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
823,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
825,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
