In [1]:
import os, yaml
from PIL import Image
import xml.etree.ElementTree as ET

In [2]:
# Get list of all XML files
all_xml = []
all_png = []
all_jpg = []

abs_path = os.path.abspath(os.path.join(os.path.dirname('')))
data_dir = 'data'
paths = ['red', 'green', 'yellow']

for path in paths:
    files = os.listdir(os.path.join(data_dir, path))
    files = list(map(lambda file: os.path.join(data_dir, path, file), files))
    
    all_xml += list(filter(lambda x: x[-4:] == '.xml', files))
    all_png += list(filter(lambda x: x[-4:] == '.png', files))
    all_jpg += list(filter(lambda x: x[-4:] == '.jpg', files))
    
print(len(all_xml), len(all_png), len(all_jpg))

1659 0 1659


# Create JPG from PNG and Delete PNG

PNG files take up a lot of space. Because our eventual TF record stores the binary data of each image, we want to convert all the PNG files to JPG files ahead of time to ensure space is saved.

In [3]:
for png_file in all_png:
    image = Image.open(png_file)
    image = image.convert('RGB')
    
    # Save file as JPG
    image.save(png_file[:-4] + '.jpg')
    
    # Remove PNG file
    os.remove(png_file)

# Create YAML File

In [4]:
all_parsed_xml = []

for xml_file in all_xml:
    root = ET.parse(xml_file).getroot()
    
    # Get filename and ensure we use JPG image instead of PNG
    filename = root.findtext('filename')
    filename = filename[:-4] + '.jpg'
    
    # Get the directory path
    path = root.findtext('path')
    if path[0] == 'C':
        dirpath = 'data/' + '/'.join(path.split('\\')[-2:-1])
    else:
        dirpath = 'data/' + '/'.join(path.split('/')[-2:-1])
    
    # Get all bounding boxes
    boxes = []
    for obj in root.findall('object'):
        # Class name
        label = obj.findtext('name')
        
        # Bounding box properties
        boundingbox = obj.find('bndbox')
        xmin = int(boundingbox.findtext('xmin'))
        ymin = int(boundingbox.findtext('ymin'))
        xmax = int(boundingbox.findtext('xmax'))
        ymax = int(boundingbox.findtext('ymax'))
        
        # Create the bounding box object and append to list
        box = {
            'class': label,
            'xmin': xmin,
            'ymin': ymin,
            'x_width': xmax - xmin,
            'y_height': ymax - ymin
        }
        boxes.append(box)
        
    # Create the image object and append to list
    parsed_xml = {
        'annotations': boxes,
        'class': 'image',
        'filename': os.path.join(dirpath, filename)
    }
    all_parsed_xml.append(parsed_xml)

In [5]:
output_file = 'data.yml'

with open(output_file, 'w') as outfile:
    yaml.dump(all_parsed_xml, outfile, default_flow_style=False)