Author: Alejandro Herrera

December 2020

Use this to convert a CSV of image information into an XML for splitting:

https://www.convertcsv.com/csv-to-xml.htm

In [1]:
import xml.etree.ElementTree as ET
import os.path
from os import path

In [2]:
def split_xml(file='convertcsv.xml', out_dir='', image_type='.png'):
    """
    Splits an xml of image data into separate xmls for each unique image.
    
    Expects:
    <root>
    <row>
    <image> imagename.imagetype </image>
    <xmin> float </xmin>
    <ymin> float </ymin>
    <xmax> float </xmax>
    <ymax> float </ymax>
    <label> object class label </label>
    </row>
    .
    .
    </root>
    
    Where each <row></row> is bounding box info within the given image.
    
    Parameters:
        file       - xml file to split
        out_dir    - Directory to save the xml files to
        image_type - Image extension type ex: '.png'
        
    Results In:
        out_dir
        |-- imagename.xml ... for each unique image found in the given file
    """
    tree = ET.parse(file)
    root = tree.getroot()
    
    # iterate through each row chunk in the xml file
    for row in root.iter('row'):
        stringname = str(row.find('image').text)
        # valid image file
        if stringname.endswith(image_type):
            stringname, ext = stringname.split('.')
            filename = format(out_dir + stringname + ".xml")
            foundfile = path.exists(filename)
            
            # if the xml file for this bounding box already exists
            if foundfile == True:
                tree_temp = ET.parse(filename)
                root_temp = tree_temp.getroot()
                root_temp.append(row)
                with open(filename, 'wb') as f:
                    f.write(ET.tostring(root_temp))
            # create new xml file to append boxes into
            else:
                newroot = ET.Element("root")
                newroot.insert(0, row)
                with open(filename, 'wb') as f:
                    f.write(ET.tostring(newroot))

            f.close()

In [3]:
split_xml(".\zergset2Cartooned\convertcsv.xml", './zergset2Cartooned/', '.png')