Convert data annotations

```xml
<document filename="16-0500_DK-82_1-34-9855_00006.jpg">
  <table id="Table_1542010323591_3033">
    <Coords points="38,36 4575,36 4575,3687 38,3687"/>
    <cell id="TableCell_1542010826285_4408" start-row="0" end-row="0" start-col="0" end-col="0">
      <Coords points="38,36 39,164 225,164 224,36"/>
    </cell>
```

In [131]:
class Table: 
    def __init__(self):
        self.cells = []
    
    def __str__(self):
        return "table_id="+self.id+", \n"+\
            f"table_coords={self.coords}, \n"+\
            f"table_cells={len(self.cells)}: \n"+\
            "\n".join([str(cell) for cell in self.cells])+"\n"

class Cell:
    def __init__(self):
        pass
    def __str__(self):
        return "\tcell_id="+self.id+", \n"+\
            f"\tcell_coords={self.coords}, \n"+\
            f"\tcell_location={self.location} \n"

In [168]:
def parse_tables_from_xml(xml_path):
    """Parses the ICDAR2019 xml annotation into a list of objects.

    Parameters
    ----------
    xml_path : path to the ICDAR2019 xml annotation.

    Returns
    -------
    tables : a list of :ref:`Table` objects from the xml annotation.
    """
    
    from xml.dom import minidom
    xmldoc = minidom.parse(xml_path)
    
    table_objs = []

    tablelist = xmldoc.getElementsByTagName('table')
    #print("num tables:", len(tablelist))

    for table in tablelist:
        table_obj = Table()

        table_id = table.attributes['id'].value
        table_obj.id = table_id
        #print("table_id: ", table_id)

        coords = [node for node in table.childNodes 
                  if node.nodeType == table.ELEMENT_NODE][0].attributes["points"].value
        coords = tuple([tuple(map(int, point.split(','))) 
                        for point in coords.split()])
        table_obj.coords = coords
        #print("coords: ", coords)

        for cell in table.getElementsByTagName("cell"):
            cell_obj = Cell()
            cell_id = cell.attributes['id'].value
            cell_obj.id = cell_id
            #print("\tcell_id: ", cell_id)

            #start_row, end_row, start_col, end_col
            location = \
                cell.attributes["start-row"].value, \
                cell.attributes["end-row"].value, \
                cell.attributes["start-col"].value, \
                cell.attributes["end-col"].value
            location = tuple(map(int, location))
            cell_obj.location = location
            #print("\tlocation: ", start_row, end_row, start_col, end_col)
            #print("\tlocation: ", location)

            ccoords = [node for node in cell.childNodes 
                      if node.nodeType == cell.ELEMENT_NODE][0].attributes["points"].value
            ccoords = tuple([tuple(map(int, point.split(','))) 
                             for point in ccoords.split()])
            
            cell_obj.coords = ccoords
            #print("\tcoords: ", ccoords)

            table_obj.cells.append(cell_obj)
            #print()
        table_objs.append(table_obj)
        
    return table_objs


In [162]:

print(parse_tables_from_xml('./samples/ICDAR2019/ground_truth/cTDaR_s001.xml')[0])

table_id=Table_1542010323591_3033, 
table_coords=((38, 36), (4575, 36), (4575, 3687), (38, 3687)), 
table_cells=286: 
	cell_id=TableCell_1542010826285_4408, 
	cell_coords=((38, 36), (39, 164), (225, 164), (224, 36)), 
	cell_location=(0, 0, 0, 0) 

	cell_id=TableCell_1542010826285_4406, 
	cell_coords=((224, 36), (225, 164), (333, 164), (333, 37)), 
	cell_location=(0, 0, 1, 1) 

	cell_id=TableCell_1542010819640_4318, 
	cell_coords=((333, 37), (333, 164), (1195, 160), (1198, 40)), 
	cell_location=(0, 0, 2, 2) 

	cell_id=TableCell_1542010811496_4230, 
	cell_coords=((1198, 40), (1195, 160), (1710, 159), (1709, 42)), 
	cell_location=(0, 0, 3, 3) 

	cell_id=TableCell_1542010601021_3824, 
	cell_coords=((1709, 42), (1710, 159), (2094, 159), (2089, 44)), 
	cell_location=(0, 0, 4, 4) 

	cell_id=TableCell_1542010592332_3736, 
	cell_coords=((2089, 44), (2094, 159), (2331, 158), (2331, 46)), 
	cell_location=(0, 0, 5, 5) 

	cell_id=TableCell_1542010583393_3648, 
	cell_coords=((2331, 46), (2331, 158),

```json
```