In [16]:
import os
from tqdm import tqdm
import argparse
import xml.etree.ElementTree as ET

In [17]:
class VOCParser(object):
    def __init__(self, norm_bbox=False):
        """
        parse voc style xml data into txt, box coordinator normalize into (0,1) or keep pixel
        """
        self.norm_bbox = norm_bbox

    def parse(self, anno_file, data_base_dir, class_map, return_img=True):
        tree = ET.parse(anno_file)

        file_name = tree.findtext("filename")
        img_dir = os.path.join(data_base_dir, 'JPEGImages', file_name)
        if return_img:
            img_dir = open(img_dir, 'rb').read()

        height = float(tree.findtext("./size/height"))
        width = float(tree.findtext("./size/width"))
        xmin, ymin, xmax, ymax = [], [], [], []
        classes, classes_name = [], []

        for obj in tree.findall('object'):
            difficult = obj.find('difficult').text
            if difficult == '1':
                continue
            name = obj.find('name').text  # .encode('utf-8')
            bbox = obj.find('bndbox')
            xmin_ = float(bbox.find('xmin').text.strip())
            ymin_ = float(bbox.find('ymin').text.strip())
            xmax_ = float(bbox.find('xmax').text.strip())
            ymax_ = float(bbox.find('ymax').text.strip())
            if self.norm_bbox:
                xmin_ /= width
                ymin_ /= height
                xmax_ /= width
                ymax_ /= height
            classes_name.append(name)
            classes.append(class_map[name])

            xmin.append(xmin_)
            ymin.append(ymin_)
            xmax.append(xmax_)
            ymax.append(ymax_)
        return img_dir, xmin, ymin, xmax, ymax, classes, classes_name

In [18]:
class COCOParser(object):
    def __init__(self, norm_bbox=False):
        self.norm_bbox = norm_bbox

    def parse(self, anno_file):
        return

In [19]:
class VOCPrepare(object):
    def __init__(self, data_dir, class_name_dir, output_dir):
        self.parser = VOCParser()

        self.xml_files = []
        for xml_file in os.listdir(os.path.join(data_dir,'Annotations')):
            self.xml_files.append(os.path.join(data_dir,'Annotations', xml_file))

        self.data_dir = data_dir
        self.output_dir = output_dir
        self.class_map = {name: idx for idx, name in enumerate(open(class_name_dir).read().splitlines())}

    def write(self):
        all_objects = self.get_objects()

        with open(self.output_dir, 'a+', encoding='UTF-8') as f:
            for objects in tqdm(all_objects):
                self.write_single(f, objects)
        print('Text generated, samples: {}'.format(len(all_objects)))

    def write_single(self, f, objects):
        gt = [','.join([str(i[n_gt]) for i in objects[1:6]]) for n_gt in range(len(objects[1]))]
        objects_new = str(objects[0]) + ' ' + ' '.join(gt)
        f.writelines(objects_new)
        f.writelines("\n")

    def get_objects(self):
        all_objects = []
        for xml in self.xml_files:
            objects = self.parser.parse(xml, self.data_dir, self.class_map, return_img=False)
            if objects is not None:
                all_objects.append(objects)
        return all_objects

In [20]:
# data_prepare = VOCPrepare(r'C:\yolo\VOC2012',
#                           r'C:\yolo\yolo v5\Yolov5-main\data\sample\voc.names',
#                           r'C:\yolo\yolo v5\output.txt')
# data_prepare.write()

In [21]:
data_prepare = VOCPrepare(r'C:\yolo\VOC2007',
                          r'C:\yolo\yolo v5\Yolov5-main\data\sample\voc.names',
                          r'C:\yolo\yolo v5\test.txt')
data_prepare.write()

100%|███████████████████████████████████████████████████████████████████████████| 4952/4952 [00:00<00:00, 29134.75it/s]

Text generated, samples: 4952



