In [1]:
import xml.etree.ElementTree as ET
from pathlib import Path

In [4]:
xml_dir = Path('annotations')
xml_list = sorted(xml_dir.iterdir())

Example of xml file:

"""<annotation>
	<folder>VOC2012</folder>
	<filename>2007_000032.jpg</filename>
	<source>
		<database>The VOC2007 Database</database>
		<annotation>PASCAL VOC2007</annotation>
		<image>flickr</image>
	</source>
	<size>
		<width>500</width>
		<height>281</height>
		<depth>3</depth>
	</size>
	<segmented>1</segmented>
	<object>
		<name>aeroplane</name>
		<pose>Frontal</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>104</xmin>
			<ymin>78</ymin>
			<xmax>375</xmax>
			<ymax>183</ymax>
		</bndbox>
	</object>
	<object>
		<name>aeroplane</name>
		<pose>Left</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>133</xmin>
			<ymin>88</ymin>
			<xmax>197</xmax>
			<ymax>123</ymax>
		</bndbox>
	</object>
	<object>
		<name>person</name>
		<pose>Rear</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>195</xmin>
			<ymin>180</ymin>
			<xmax>213</xmax>
			<ymax>229</ymax>
		</bndbox>
	</object>
	<object>
		<name>person</name>
		<pose>Rear</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>26</xmin>
			<ymin>189</ymin>
			<xmax>44</xmax>
			<ymax>238</ymax>
		</bndbox>
	</object>
</annotation>"""

In [12]:
tree = ET.parse(xml_list[1])

root = tree.getroot()

root2 = ET.fromstring(xml_list[1].read_text())

root == root2

False

In [13]:
root

<Element 'annotation' at 0x1070e3130>

In [14]:
root2

<Element 'annotation' at 0x107137860>

In [15]:
for child in root:
    print(child.tag, child.attrib)
print('')
for child in root2:
    print(child.tag, child.attrib)

folder {}
filename {}
source {}
size {}
segmented {}
object {}
object {}
object {}
object {}

folder {}
filename {}
source {}
size {}
segmented {}
object {}
object {}
object {}
object {}


In [23]:
for name in root.iter('name'):
    print(name.text)
for bbox in root.iter('bndbox'):
    for child in bbox:
        print(child.tag)

-----
aeroplane
-----
aeroplane
-----
person
-----
person
xmin
ymin
xmax
ymax
xmin
ymin
xmax
ymax
xmin
ymin
xmax
ymax
xmin
ymin
xmax
ymax


In [26]:
dims = ['xmin', 'ymin']
for obj in root.iter('object'):
    name = obj.find('name').text
    print(name)
    bbox = obj.find('bndbox')
    # ddict = {}
    # for d in dims:
    #     dim = bbox.find(d)
    #     print(dim.tag)
    #     print(dim.text)
    for child in bbox:
        print(child.tag)
        print(child.text)

aeroplane
xmin
104
ymin
78
xmax
375
ymax
183
aeroplane
xmin
133
ymin
88
xmax
197
ymax
123
person
xmin
195
ymin
180
xmax
213
ymax
229
person
xmin
26
ymin
189
xmax
44
ymax
238


In [30]:
pascal_object_categories = ['__bgr__', 'aeroplane', 'bicycle', 'bird', 'boat',
                     'bottle', 'bus', 'car', 'cat', 'chair',
                      'cow', 'diningtable', 'dog', 'horse',
                      'motorbike', 'person', 'pottedplant',
                     'sheep', 'sofa', 'train', 'tvmonitor']

pascal_voc_classes = dict(zip(pascal_object_categories,
                              range(len(pascal_object_categories))))

list_of_classes = pascal_voc_classes

In [34]:
.
classes = []
bboxes = []

dims = ['xmin', 'ymin', 'xmax', 'ymax']
for obj in root.iter('object'):
    name = obj.find('name').text
    # some classes not in Pascal VOC data set (e.g.head)
    if name in list_of_classes.keys():
        classes.append(name)
        # now create bbox list
        bb = []
        bndbox = obj.find('bndbox')
        for dim in dims:
            bb.append(float(bndbox.find(dim).text))
        bboxes.append(bb)

In [35]:
classes

['aeroplane', 'aeroplane', 'person', 'person']

In [36]:
bboxes

[[104.0, 78.0, 375.0, 183.0],
 [133.0, 88.0, 197.0, 123.0],
 [195.0, 180.0, 213.0, 229.0],
 [26.0, 189.0, 44.0, 238.0]]