In [4]:
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import pickle
import os
import imageio

## Setup
In order to proceed with dataset creation, you should download COCO 2014 Train/Val annotations (241mb).  
The instructions are for Ubuntu and require wget and unzip. It works on bash Windows as well.  
wget --directory-prefix=downloads http://images.cocodataset.org/annotations/annotations_trainval2014.zip  
mkdir -p dataset/annotations  
unzip downloads/annotations_trainval2014.zip -d dataset/annotations/

In [6]:
# initialize COCO api to handle instance annotations
dataType='train2014'
annFile='downloads/dataset/annotations/instances_{}.json'.format(dataType)
coco=COCO(annFile)

loading annotations into memory...


FileNotFoundError: [Errno 2] No such file or directory: 'downloads/dataset/annotations/instances_train2014.json'

## Dataset creation
COCO 2014 train annotations has 90 stuff classes and 82783 images
The full list of classes can be obtained by calling coco.cats

In [None]:
# Image segmentation by category/obj in gray scale/RGB
# paths
out_dir_cat='ann/cat/'
out_dir_obj='ann/obj/'
out_dir_rgb_cat='ann/rgb_cat/'
out_dir_rgb_obj='ann/rgb_obj/'
# Set between cat and obj image creation, categories has each object related with their pixels corresponding ID,
# obj are random coloured, making it easier for visualization
cat_save=True
# Set to true if you want showroom RGB images, useless for training data as
# it's does not contains pixels corresponding categories IDs 
rgb_save=False
# Get all imageIDs and categories
allImgIds = coco.getImgIds()
allImgCat=coco.cats
# Iterate in all images notations to create segmentation the corresponding segmentation image 
for IM in range(0,len(allImgIds)):
#     Get all image IDs
    imgIds=coco.getImgIds(imgIds = allImgIds)
#     Load corresponding image by it's ID
    img = coco.loadImgs(imgIds)[IM]
#     Get annotations by it's image's ID
    annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
#     Load annotations by it's corresponding IDs
    anns = coco.loadAnns(annIds)
#     Create matrix with the same size as original image to store classes in both gray and RGB scales 
    seg_imageGray=np.zeros((img['height'],img['width'])).astype(np.uint8)
    seg_imageRGB=np.zeros((img['height'],img['width'],3)).astype(np.uint8)
#     For each image annotation, extract their corresponding pixels objects
# each image can have multiple annotations
    for i in range(len(anns)):
#         Get object mask
        seg_image=coco.annToMask(anns[i])
#     Check for pixels in common and remove from image as solution to overlapping images
        seg_image=(seg_image-(seg_image&seg_imageGray))
#     Handle the creation of RGB images, just for showroom 
        if rgb_save==True:
#         Create a matrix with the same size as image, but with 3 channels 
            imgRGB = np.zeros((img['height'],img['width'],3))
#     Get random values for color mask [0,1] and store objects pixels in the matrix
            color_mask = np.random.random((1, 3)).tolist()[0]
            imgRGB[:,:,0]=((seg_imageGray|seg_image)==1)*color_mask[0]
            imgRGB[:,:,1]=((seg_imageGray|seg_image)==1)*color_mask[1]
            imgRGB[:,:,2]=((seg_imageGray|seg_image)==1)*color_mask[2]
#             Add coloured object into the main RGB image
            seg_imageRGB=seg_imageRGB+imgRGB
#     save images (categories/objects and RGB/gray)
        if cat_save==True:
            seg_imageGray=(seg_imageGray+((seg_imageGray|seg_image)==1)*(anns[i]['category_id']+50))
        else:
            seg_imageGray=(seg_imageGray+((seg_imageGray|seg_image)==1)*(50+anns[i]['category_id']+(110//len(anns))*i))
    if cat_save==True:
        imageio.imsave(out_dir_cat+img['file_name'], seg_imageGray.astype(np.uint8))
        if rgb_save==True:
            imageio.imsave(out_dir_rgb_cat+img['file_name'], Norm(seg_imageRGB,0,255).astype(np.uint8))
    else:
        imageio.imsave(out_dir_obj+img['file_name'], seg_imageGray.astype(np.uint8))
        if rgb_save==True:
            imageio.imsave(out_dir_rgb_obj+img['file_name'], Norm(seg_imageRGB,0,255).astype(np.uint8))