In [2]:
from pascal_voc_writer import Writer
import xml.etree.cElementTree as ET
import os
import pandas as pd
import numpy as np

### Function to write in Pascal VOC format

In [30]:
def write_pascal_voc(element_list, subelement_list):
    """
    function defined to convert the values fed as parameters into 
    Pascal VOC format.  The output is saved into 'Pascal_VOC_Annotaions'
    folder.
    
    Input: 
    element_list: list of all elements that form the header information
    subelement_list: list of sub-elements that correspond to each label
    
    The elements correspond to:
    name:   file name
    width:  width of the image
    height: height of the image
    
    The sub-elements correspond to:
    label: the label in context of annotation
    xmin: x-min attribute of the bounding box
    ymin: y-min attribute of the bounding box
    xmax: x-max attribute of the bounding box
    ymax: y-max attribute of the bounding box
    
    This function essentially writes the output in Pascal VOC format and saves to folder
    """

    for elem in element_list:
        filename=elem['name']
        keylist = [filename]
        sub = list(filter(lambda d:d['name'] in keylist, subelement_list))

        # set the file name of the output file that holds the annotation
        output_file = 'Pascal_VOC_Annotations/' + filename.replace('.jpeg','') + '.xml'
        
        # form the writer object and capture the header level information
        writer = Writer(filename, elem['width'], elem['height'])
        
        # loop through the sub-elements to write the bounding box details for each label
        for s in sub:
            writer.addObject(s['label'], s['xtl'], s['ytl'], s['xbr'], s['ybr'])

        writer.save(output_file)


### Function to Parse XML (CVAT Anotation)

In [31]:
def parse_xml(xml_file_name):
    tree = ET.parse(xml_file_name)  
    root = tree.getroot()
    elem_list=[]
    subelem_list=[]
    
    for elems in root:
        if (elems.attrib.get('id') != None):
            elem_list.append({'name': elems.attrib.get('name'),
                             'width': elems.attrib.get('width'),
                             'height': elems.attrib.get('height')})
            #print("elem_list: ",elem_list)
            
            for subelem in elems:
                if (subelem.attrib.get('label') != None):
                    subelem_list.append({'name': elems.attrib.get('name'),
                                        'label': subelem.attrib.get('label'),
                                        'xtl': subelem.attrib.get('xtl'),
                                        'ytl': subelem.attrib.get('ytl'),
                                        'xbr': subelem.attrib.get('xbr'),
                                        'ybr': subelem.attrib.get('ybr')})
                    
    return elem_list, subelem_list


In [34]:
elem_list, subelem_list = parse_xml('CVAT-Annotations/construction_annotations_v2.xml')
write_pascal_voc(elem_list, subelem_list)

### Generate 'dataset.txt' file

In [35]:
file=open("dataset.txt", "w")

for elem in elem_list:
    filename=elem['name']
    #print(filename)
    keylist = [filename]
    sub = list(filter(lambda d:d['name'] in keylist, subelem_list))
    write_lst = []

    # capture contents to write to file
    write_lst.append(filename)

    # loop through the sub-elements to write the bounding box details for each label
    for s in sub:
        write_lst.append(s['xtl'])
        write_lst.append(s['ytl'])
        write_lst.append(s['xbr'])
        write_lst.append(s['ybr'])
        write_lst.append(s['label'])

    file.writelines(' '.join(write_lst))
    file.writelines('\n')

file.close()

Sequence01_0.jpeg
Sequence01_1.jpeg
Sequence01_10.jpeg
Sequence01_11.jpeg
Sequence01_12.jpeg
Sequence01_13.jpeg
Sequence01_14.jpeg
Sequence01_15.jpeg
Sequence01_16.jpeg
Sequence01_17.jpeg
Sequence01_18.jpeg
Sequence01_19.jpeg
Sequence01_2.jpeg
Sequence01_20.jpeg
Sequence01_21.jpeg
Sequence01_22.jpeg
Sequence01_23.jpeg
Sequence01_24.jpeg
Sequence01_25.jpeg
Sequence01_26.jpeg
Sequence01_27.jpeg
Sequence01_28.jpeg
Sequence01_29.jpeg
Sequence01_3.jpeg
Sequence01_30.jpeg
Sequence01_31.jpeg
Sequence01_32.jpeg
Sequence01_33.jpeg
Sequence01_34.jpeg
Sequence01_35.jpeg
Sequence01_36.jpeg
Sequence01_37.jpeg
Sequence01_38.jpeg
Sequence01_39.jpeg
Sequence01_4.jpeg
Sequence01_40.jpeg
Sequence01_41.jpeg
Sequence01_42.jpeg
Sequence01_43.jpeg
Sequence01_44.jpeg
Sequence01_45.jpeg
Sequence01_46.jpeg
Sequence01_47.jpeg
Sequence01_48.jpeg
Sequence01_49.jpeg
Sequence01_5.jpeg
Sequence01_50.jpeg
Sequence01_51.jpeg
Sequence01_52.jpeg
Sequence01_53.jpeg
Sequence01_54.jpeg
Sequence01_55.jpeg
Sequence01_56.jpeg

Sequence09_437.jpeg
Sequence09_438.jpeg
Sequence09_439.jpeg
Sequence09_440.jpeg
Sequence09_441.jpeg
Sequence09_442.jpeg
Sequence09_443.jpeg
Sequence09_445.jpeg
Sequence09_446.jpeg
Sequence09_450.jpeg
Sequence09_451.jpeg
Sequence09_452.jpeg
Sequence09_453.jpeg
Sequence09_454.jpeg
Sequence09_455.jpeg
Sequence09_456.jpeg
Sequence09_457.jpeg
Sequence09_462.jpeg
Sequence09_463.jpeg
Sequence09_464.jpeg
Sequence09_466.jpeg
Sequence09_468.jpeg
Sequence09_469.jpeg
Sequence09_471.jpeg
Sequence09_472.jpeg
Sequence09_474.jpeg
Sequence09_475.jpeg
Sequence09_476.jpeg
Sequence09_477.jpeg
Sequence09_478.jpeg
Sequence09_479.jpeg
Sequence09_480.jpeg
Sequence09_481.jpeg
Sequence09_482.jpeg
Sequence09_483.jpeg
Sequence09_484.jpeg
Sequence09_485.jpeg
Sequence09_487.jpeg
Sequence09_488.jpeg
Sequence09_489.jpeg
Sequence09_490.jpeg
Sequence10_492.jpeg
Sequence10_493.jpeg
Sequence10_495.jpeg
Sequence10_496.jpeg
Sequence10_497.jpeg
Sequence10_498.jpeg
Sequence10_499.jpeg
Sequence10_500.jpeg
Sequence10_501.jpeg


Sequence20_1138.jpeg
Sequence20_1139.jpeg
Sequence20_1140.jpeg
Sequence20_1141.jpeg
Sequence20_1142.jpeg
