In [1]:

import xml.etree.ElementTree as ET
import os, argparse
import numpy as np
from collections import OrderedDict

In [2]:
sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

In [3]:
class_count = {}

In [4]:
def convert_annotation(dataset_path, year, image_id, list_file, include_difficult):
    if_file = open('%s/VOC%S/Annotations/%s.xml'.format(dataset_path, year, image_id), encoding='utf-8')
    tree = ET.parse(infile)
    root = tree.getroot()
    
    for obj in root.iter('object'):
        difficult = obj.find('difficult')
        if difficult is None:
            difficult = '0'
        else:
            difficult = difficult.text

        class_name = obj.find('name').text
        if class_name not in classes:
            continue
        if not include_difficult and int(difficult)==1:
            continue
        class_id = classes.index(class_name)
        
        # parse box coordinate to (xmin, ymin, xmax, ymax) format
        xml_box = obj.find('bndbox')
        box = (int(float(xml_box.find('xmin').text)), int(float(xml_box.find('ymin').text)), int(float(xml_box.find('xmax').text)), int(float(xml_box.find('ymax').text)))
        # write box info to txt
        list_file.write(" " + ",".join([str(item) for item in box]) + ',' + str(class_id))
        class_count[class_name] = class_count[class_name] + 1
        

In [5]:
dataset_path = r'C:\Users\xia\Documents\datasets\VOCdevkit'
# get real path for dataset
dataset_realpath = os.path.realpath(dataset_path)
dataset_realpath
year = '2007'
image_set = 'test'
output_path = r'C:\Users\xia\Documents\codes\20210403_目标检测\target_detection\2_yolov4\yolov4_pycharm\outputs'

In [6]:
image_ids = open('%s/VOC%s/ImageSets/Main/%s.txt'%(dataset_realpath, year, image_set)).read().strip().split()
image_ids[:5]

['000001', '000002', '000003', '000004', '000006']

In [7]:
list_file = open('2007_test.txt', 'r')
list_file

<_io.TextIOWrapper name='2007_test.txt' mode='r' encoding='cp936'>

In [8]:
def annotation_parse(annotation_lines, class_names):
    '''
    parse annotation lines to get image dict and ground truth class dict

    image dict would be like:
    annotation_records = {
        '/path/to/000001.jpg': {'100,120,200,235':'dog', '85,63,156,128':'car', ...},
        ...
    }

    ground truth class dict would be like:
    classes_records = {
        'car': [
                ['000001.jpg','100,120,200,235'],
                ['000002.jpg','85,63,156,128'],
                ...
               ],
        ...
    }
    '''
    annotation_records = OrderedDict()
    classes_records = OrderedDict({class_name: [] for class_name in class_names})

    for line in annotation_lines:
        box_records = {}
        image_name = line.split(' ')[0]
        boxes = line.split(' ')[1:]
        for box in boxes:
            # strip box coordinate and class
            class_name = class_names[int(box.split(',')[-1])]
            coordinate = ','.join(box.split(',')[:-1])
            box_records[coordinate] = class_name
            # append or add ground truth class item
            record = [os.path.basename(image_name), coordinate]
            if class_name in classes_records:
                classes_records[class_name].append(record)
            else:
                classes_records[class_name] = list([record])
        annotation_records[image_name] = box_records

    return annotation_records, classes_records

In [9]:
def get_dataset(annotation_file, shuffle=True):
    with open(annotation_file) as f:
        lines = f.readlines()
        lines = [line.strip() for line in lines]

    if shuffle:
        np.random.seed(int(time.time()))
        np.random.shuffle(lines)
        #np.random.seed(None)

    return lines

In [10]:
annotation_lines = get_dataset('2007_test.txt', shuffle=False)
annotation_lines[:2]

['C:\\Users\\xia\\Documents\\datasets\\VOCdevkit/VOC2007/JPEGImages/000001.jpg 48,240,195,371,11 8,12,352,498,14',
 'C:\\Users\\xia\\Documents\\datasets\\VOCdevkit/VOC2007/JPEGImages/000002.jpg 139,200,207,301,18']

In [11]:
annotation_records, gt_classes_records = annotation_parse(annotation_lines,classes)

In [None]:
annotation_records

In [20]:
for line in annotation_lines:
    box_records = {}
    image_name = line.split(' ')[0]
    boxes = line.split(' ')[1:]
    print(boxes)
    break

['48,240,195,371,11', '8,12,352,498,14']
