In [1]:
import os
import json
import cv2 as cv
from PIL import Image
import xml.etree.ElementTree as ET

# Pathing

In [2]:
# imagenet bbox
bbox_path = 'E:/CV_Project/imagenet/bbox/Annotation'
# imagenet
image_cats = 'C:/Users/Belda/Documents/Harvard/Comp_Vision/Project/imageNetCatNumber.txt'
imgnet_imgs = 'E:/CV_Project/imagenet/ILSVRC/Data/DET/train/ILSVRC2013_train'
# coco as pascal voc
bear_xml = 'F:/Det_Project/train_bears'
person_xml = 'F:/Det_Project/train_person'

In [3]:
# this is to map old ids to my new categories
# 0:background
# 1:fox
# 2:bear
# 3:hog
# 4:dog
# 5:person
label_map = {'background':0, 'fox':1, 'bear':2, 'n02119789':1, 'n02119022':1,
            'n02120505':1, 'n02133161':2, 'n02395406':3, 'n02110185':4, 
             'n02110063':4, 'n02109047':4, 'person':5}
# getting counts for images to balance training across all cats
# ~1K images to train
count_map = {1:75, 2:100, 3:200, 4:67, 5:200}
output_folder = 'F:/Det_Project/full_dataset'

In [1]:
# this is from project in utils.py
# it will parse the xml and return new id map along with boxes
def parse_annotation(annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    boxes = list()
    labels = list()
    difficulties = list()
    for object in root.iter('object'):
        # there are some errors in the XML files
        # for the coco format annotations that were ported
        try:
            difficult = int(object.find('difficult').text == '1')
            label = object.find('name').text.lower().strip()
            bbox = object.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
        except: 
            continue
        if label not in label_map:
            continue
        if not bbox or not xmin or not ymin or not xmax or not ymax:
            continue

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label_map[label])
        difficulties.append(difficult)

    return {'boxes':boxes, 'labels':labels, 'difficulties':difficulties}

# coco as pascal voc

In [5]:
def getImgObjFromCOCO(path_dir, image_count):
    files = os.listdir(path_dir)
    train_images = list()
    train_objects = list()
    test_images = list()
    test_objects = list()
    img_index = 0
    for file in files:
        if '.xml' in file:
            # checking for bad parsing,
            # empty labels or boxes
            bad_parse = 0
            try:
                annote = parse_annotation(os.path.join(path_dir, file))
            except: bad_parse = 1
            if bad_parse:
                continue
            if not annote['boxes'] or not annote['labels']:
                continue
                
            filename = file.split('.xml')[0]
            if img_index <= image_count:
                train_images.append(os.path.join(path_dir, filename+'.jpg'))
                train_objects.append(annote)
            else:
                test_images.append(os.path.join(path_dir, filename+'.jpg'))
                test_objects.append(annote)
            img_index = img_index + 1
    return train_images, train_objects, test_images, test_objects

In [6]:
image_count = count_map[label_map['bear']]
bear_train_images, bear_train_objects, bear_test_images, bear_test_objects = getImgObjFromCOCO(bear_xml, image_count)

In [7]:
print('There are {} bear images to train with.'.format(len(bear_train_images)))

There are 101 bear images to train with.


In [8]:
image_count = count_map[label_map['person']]
person_train_images, person_train_objects, person_test_images, person_test_objects = getImgObjFromCOCO(person_xml, image_count)

In [9]:
print('There are {} person images to train with.'.format(len(person_train_images)))

There are 198 person images to train with.


# Imagenet images

In [10]:
with open (image_cats) as f:
    categories = f.read().splitlines()

# the image list will just be absolute filepaths
imgnet_train_images = list()
imgnet_train_objects = list()
imgnet_test_images = list()
imgnet_test_objects = list()
flag = 0

for cat in categories:
    files = os.listdir(os.path.join(bbox_path, cat))
    image_count = count_map[label_map[cat]]
    img_index = 0
    for file in files:
        filename = file.split('.xml')[0]
        img_path = os.path.join(imgnet_imgs, cat, filename+'.JPEG')
        # need to check for correlated image
        # there is a discrepency between DET dataset and bounding box dataset
        if os.path.exists(img_path):
            # need to check for bad parsing there's bad XML in Imagenet
            bad_parse = 0
            try:
                annote = parse_annotation(os.path.join(bbox_path, cat, file))
            except: bad_parse = 1
            if bad_parse:
                continue
            if img_index <= image_count:
                imgnet_train_images.append(img_path)
                imgnet_train_objects.append(annote)
            else:
                imgnet_test_images.append(img_path)
                imgnet_test_objects.append(annote)
        img_index = img_index + 1

In [11]:
print('There are {} images from Imagenet.'.format(len(imgnet_train_images)))

There are 720 images from Imagenet.


# Combining COCO and Imagenet sets

In [12]:
training_images = imgnet_train_images
training_images.extend(bear_train_images[:count_map[label_map['bear']]])
training_images.extend(person_train_images[:count_map[label_map['person']]])

training_objects = imgnet_train_objects
training_objects.extend(bear_train_objects[:count_map[label_map['bear']]])
training_objects.extend(person_train_objects[:count_map[label_map['person']]])

testing_images = imgnet_test_images
testing_images.extend(bear_test_images[count_map[label_map['bear']]:])
testing_images.extend(person_test_images[count_map[label_map['person']]:])

testing_objects = imgnet_test_objects
testing_objects.extend(bear_test_objects[count_map[label_map['bear']]:])
testing_objects.extend(person_test_objects[count_map[label_map['person']]:])

# writing dataset to JSON

In [13]:
with open(os.path.join(output_folder, 'label_map.json'), 'w') as j:
    json.dump(label_map, j)
with open(os.path.join(output_folder, 'TRAIN_images.json'), 'w') as j:
    json.dump(training_images, j)
with open(os.path.join(output_folder, 'TRAIN_objects.json'), 'w') as j:
    json.dump(training_objects, j)
with open(os.path.join(output_folder, 'TEST_images.json'), 'w') as j:
    json.dump(testing_images, j)
with open(os.path.join(output_folder, 'TEST_objects.json'), 'w') as j:
    json.dump(testing_objects, j)

In [14]:
print('There are {} images to train from.'.format(len(training_images)))
print('There are {} images to test from.'.format(len(testing_images)))

There are 1018 images to train from.
There are 3811 images to test from.
