 # Table of Contents
<div class="toc" style="margin-top: 1em;"><ul class="toc-item" id="toc-level0"></ul></div>

In [7]:
# %load ../pascal_voc_parser.py
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
def get_data(input_path, visualise=False):
    """Load data from an input file.
      https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19
      
    """
    all_imgs = []

    classes_count = {}

    class_mapping = {}

    data_paths = [os.path.join(input_path,s) for s in ['VOC2012']]


    print('Parsing annotation files....')

    for data_path in data_paths:

        annot_path = os.path.join(data_path, 'Annotations')
        imgs_path = os.path.join(data_path, 'JPEGImages')
        imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')
        imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')

        trainval_files = []
        test_files = []
        try:
            with open(imgsets_path_trainval) as f:
                for line in f:
                    trainval_files.append(line.strip() + '.jpg')
        except Exception as e:
            print(e)

        try:
            with open(imgsets_path_test) as f:
                for line in f:
                    test_files.append(line.strip() + '.jpg')
        except Exception as e:
            if data_path[-7:] == 'VOC2012':
                # this is expected, most pascal voc distibutions dont have the test.txt file
                pass
            else:
                print(e)

        annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]
        idx = 0
        for annot in annots:
            try:
                idx += 1

                et = ET.parse(annot)
                element = et.getroot()

                element_objs = element.findall('object')
                element_filename = element.find('filename').text
                element_width = int(element.find('size').find('width').text)
                element_height = int(element.find('size').find('height').text)

                if len(element_objs) > 0:
                    annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,
                                       'height': element_height, 'bboxes': []}

                    if element_filename in trainval_files:
                        annotation_data['imageset'] = 'trainval'
                    elif element_filename in test_files:
                        annotation_data['imageset'] = 'test'
                    else:
                        annotation_data['imageset'] = 'trainval'

                for element_obj in element_objs:
                    class_name = element_obj.find('name').text
                    if class_name not in classes_count:
                        classes_count[class_name] = 1
                    else:
                        classes_count[class_name] += 1

                    if class_name not in class_mapping:
                        class_mapping[class_name] = len(class_mapping)

                    obj_bbox = element_obj.find('bndbox')
                    x1 = int(round(float(obj_bbox.find('xmin').text)))
                    y1 = int(round(float(obj_bbox.find('ymin').text)))
                    x2 = int(round(float(obj_bbox.find('xmax').text)))
                    y2 = int(round(float(obj_bbox.find('ymax').text)))
                    difficulty = int(element_obj.find('difficult').text) == 1
                    annotation_data['bboxes'].append(
                        {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})
                all_imgs.append(annotation_data)

                if visualise:
                    img = cv2.imread(annotation_data['filepath'])
                    for bbox in annotation_data['bboxes']:
                        cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[
                                        'x2'], bbox['y2']), (0, 0, 255))
                    cv2.imshow('img', img)
                    cv2.waitKey(0)

            except Exception as e:
                print(e)
                continue
                
    print("Parsing annotation files Finished without error!")
    return all_imgs, classes_count, class_mapping


In [8]:
all_imgs, classes_count, class_mapping = get_data('/home/abanihi/Documents/deep-data/VOCdevkit/')

Parsing annotation files....
Parsing annotation files Finished without error!


In [9]:
classes_count

{'aeroplane': 1002,
 'bicycle': 837,
 'bird': 1271,
 'boat': 1059,
 'bottle': 1561,
 'bus': 685,
 'car': 2492,
 'cat': 1277,
 'chair': 3056,
 'cow': 771,
 'diningtable': 800,
 'dog': 1598,
 'horse': 803,
 'motorbike': 801,
 'person': 17401,
 'pottedplant': 1202,
 'sheep': 1084,
 'sofa': 841,
 'train': 704,
 'tvmonitor': 893}

In [10]:
class_mapping

{'aeroplane': 16,
 'bicycle': 2,
 'bird': 9,
 'boat': 14,
 'bottle': 7,
 'bus': 8,
 'car': 5,
 'cat': 17,
 'chair': 3,
 'cow': 10,
 'diningtable': 4,
 'dog': 12,
 'horse': 18,
 'motorbike': 13,
 'person': 0,
 'pottedplant': 19,
 'sheep': 1,
 'sofa': 11,
 'train': 15,
 'tvmonitor': 6}

In [11]:
type(all_imgs)

list

In [12]:
all_imgs[0]

{'bboxes': [{'class': 'person',
   'difficult': False,
   'x1': 184,
   'x2': 272,
   'y1': 47,
   'y2': 149}],
 'filepath': '/home/abanihi/Documents/deep-data/VOCdevkit/VOC2012/JPEGImages/2012_003050.jpg',
 'height': 334,
 'imageset': 'trainval',
 'width': 500}

In [14]:
if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

In [15]:
classes_count

{'aeroplane': 1002,
 'bg': 0,
 'bicycle': 837,
 'bird': 1271,
 'boat': 1059,
 'bottle': 1561,
 'bus': 685,
 'car': 2492,
 'cat': 1277,
 'chair': 3056,
 'cow': 771,
 'diningtable': 800,
 'dog': 1598,
 'horse': 803,
 'motorbike': 801,
 'person': 17401,
 'pottedplant': 1202,
 'sheep': 1084,
 'sofa': 841,
 'train': 704,
 'tvmonitor': 893}