In [19]:
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
from pycococreatortools import pycococreatortools
import pathlib
from matplotlib import pylab as pl
import cv2

ROOT_DIR = 'train'
IMAGE_DIR = os.path.join(ROOT_DIR, 'crestedgecko_train2018')
ANNOTATION_DIR = os.path.join(ROOT_DIR, 'annotations')

DATASET_NAME_PREFIX = 'CG'
TRAIN_DATASET = 'train2018'
VAL_DATASET = 'val2018'

In [20]:
def resize_image(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    resized = cv2.resize(image, dim, interpolation = inter)

    return resized

# change file name of all images
files = os.listdir(IMAGE_DIR)

count = 1

# renaming file name of all images in dataset
for name in files:
    ext = pathlib.PurePosixPath(name).suffix
    new_name = name.replace(name, DATASET_NAME_PREFIX+"_"+TRAIN_DATASET+"_{0:012d}".format(count))+ext
    os.rename(os.path.join(IMAGE_DIR,name),os.path.join(IMAGE_DIR,new_name))
    
    # resize image
    imgdata = cv2.imread(os.path.join(IMAGE_DIR,new_name))
    resize_data=resize_image(imgdata, width=640)
    cv2.imwrite(os.path.join(IMAGE_DIR,new_name), resize_data)
    
    count+=1

print "Renaming image file name is done!"
print "Dataset:{}".format(count-1)


Renaming image file name is done!
Dataset:1


In [21]:
INFO = {
    "description": "Crested Gecko Dataset",
    "url": "https://github.com/asyncbridge/pycococreator",
    "version": "0.0.1",
    "year": 2018,
    "contributor": "asyncbridge",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

CATEGORIES = [
    {
        'id': 1,
        'name': 'patternless',
        'supercategory': 'crestedgecko',
    },
    {
        'id': 2,
        'name': 'dalmatian',
        'supercategory': 'crestedgecko',
    },
    {
        'id': 3,
        'name': 'lilywhite',
        'supercategory': 'crestedgecko',
    },
    {
        'id': 4,
        'name': 'tricolor',
        'supercategory': 'crestedgecko',
    },
    {
        'id': 5,
        'name': 'bicolor',
        'supercategory': 'crestedgecko',
    },
    {
        'id': 6,
        'name': 'axanthic',
        'supercategory': 'crestedgecko',
    },
    {
        'id': 7,
        'name': 'harlequin',
        'supercategory': 'crestedgecko',
    }
]

def filter_for_jpeg(root, files):
    file_types = ['*.jpeg', '*.jpg']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    
    return files

def filter_for_annotations(root, files, image_filename):
    file_types = ['*.png']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
    file_name_prefix = basename_no_extension + '.*'
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]
    files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]

    return files

def main():

    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }

    image_id = 1
    segmentation_id = 1
    
    # filter for jpeg images
    for root, _, files in os.walk(IMAGE_DIR):
        image_files = filter_for_jpeg(root, files)

        # go through each image
        for image_filename in image_files:
            image = Image.open(image_filename)
            image_info = pycococreatortools.create_image_info(
                image_id, os.path.basename(image_filename), image.size)
            coco_output["images"].append(image_info)

            # filter for associated png annotations
            for root, _, files in os.walk(ANNOTATION_DIR):
                annotation_files = filter_for_annotations(root, files, image_filename)

                # go through each associated annotation
                for annotation_filename in annotation_files:
                    
                    print(annotation_filename)
                    class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename][0]

                    category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}

                    binary_mask = np.asarray(Image.open(annotation_filename)
                        .convert('1')).astype(np.uint8)
                    
                    annotation_info = pycococreatortools.create_annotation_info(
                        segmentation_id, image_id, category_info, binary_mask,
                        image.size, tolerance=2)

                    if annotation_info is not None:
                        coco_output["annotations"].append(annotation_info)

                    segmentation_id = segmentation_id + 1

            image_id = image_id + 1

    with open('{}/instances_crestedgecko_train2018.json'.format(ROOT_DIR), 'w') as output_json_file:
        json.dump(coco_output, output_json_file)


if __name__ == "__main__":
    main()


train/annotations/CG_train2018_000000000001_patternless_0.png
