In [None]:
import pandas as pd
import xmltodict
import os
import cv2
from datetime import datetime
import json

## File parsing (image and labels)

In [None]:
dataset_path = '../data/CommodityImg_Dataset_20200831_bottle_splitted/images/'
output_path = '../data/CommodityImg_Dataset_20200831_bottle_splitted/images_yolo/'
if not os.path.exists(output_path) :
    os.mkdir(output_path)

Scan XML files

In [None]:
def parse_one_voc_object(obj) :
    obj_name = obj['name']
    obj_bbox_center = ((float(obj['bndbox']['xmin'])+float(obj['bndbox']['xmax']))*0.5,
                       (float(obj['bndbox']['ymin'])+float(obj['bndbox']['ymax']))*0.5)
    obj_bbox_size = (float(obj['bndbox']['xmax'])-float(obj['bndbox']['xmin']),
                     float(obj['bndbox']['ymax'])-float(obj['bndbox']['ymin']))
    return obj_name, obj_bbox_center, obj_bbox_size

In [None]:
dict_classes = {}

name_token_desc = ['obj_name','side_name','obj_idx','unknown1','unknown2','unknown3','unknown4','unknown5']

# scan all files in path
for i, filename in enumerate(os.listdir(dataset_path)) :
    # get only XML files
    if filename[-4:].lower()=='.xml' :
        print(filename)
        # open VOC annotation file
        with open(os.path.join(dataset_path, filename), 'r') as f :
            xml_string = f.read()
            f.close()

            # parse xml file (extract annotations)
            voc_object = xmltodict.parse(xml_string, dict_constructor=dict)['annotation']
            img_size = (float(voc_object['size']['width']), float(voc_object['size']['height']))
            voc_anno_list = voc_object['object']

            bbox_list = []

            # only one annotation
            if type(voc_anno_list) == dict :
                obj_name, obj_bbox_center, obj_bbox_size = parse_one_voc_object(voc_anno_list)
                if obj_name not in dict_classes :
                    dict_classes[obj_name] = len(dict_classes)
                obj_idx = dict_classes[obj_name]
                obj_bbox = (obj_bbox_center[0]/img_size[0], obj_bbox_center[1]/img_size[1],
                            obj_bbox_size[0]/img_size[0], obj_bbox_size[1]/img_size[1])
                bbox_list.append({'obj_name':obj_name, 'obj_idx':obj_idx, 'obj_bbox':obj_bbox})
            else :
                for one_bbox in voc_anno_list :
                    obj_name, obj_bbox_center, obj_bbox_size = parse_one_voc_object(one_bbox)
                    if obj_name not in dict_classes :
                        dict_classes[obj_name] = len(dict_classes)
                    obj_idx = dict_classes[obj_name]
                    obj_bbox = (obj_bbox_center[0]/img_size[0], obj_bbox_center[1]/img_size[1],
                                obj_bbox_size[0]/img_size[0], obj_bbox_size[1]/img_size[1])
                    bbox_list.append({'obj_name':obj_name, 'obj_idx':obj_idx, 'obj_bbox':obj_bbox})

            # write to yolo file
            with open(os.path.join(output_path, filename[:-4]+'.txt'), 'w') as fo :
                for row in bbox_list :
                    fo.write(str(row['obj_idx'])+' '+str(row['obj_bbox'][0])+' '+str(row['obj_bbox'][1])+' '+str(row['obj_bbox'][2])+' '+str(row['obj_bbox'][3])+'\n')
                fo.close()

# write classes file
sorted_classes_list = sorted(dict_classes.items(), key=lambda x:x[1])
with open(os.path.join(output_path, 'classes.txt'), 'w') as fo :
    for row in sorted_classes_list :
        fo.write(row[0]+'\n')
    fo.close()