## FILTERING OBJECT CATEGORIES
This notebook shows how to go through the contents of the COCO dataset using it's API to extract all images of the dataset for desired categories. It also provides a means of writing their object boundary boxes to a txt file in the format used by darknet for training and testing.

In [None]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [None]:
dataDir='..'
dataType='val2014'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)

In [None]:
# initialize COCO api for instance annotations
coco=COCO(annFile)

In [None]:
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))

In [None]:
# get all images containing at least one of given categories
catNms = ['sheep','bottle']
catIds = np.array([coco.getCatIds(catNms=[catNm]) for catNm in catNms])

catIdsDict = {k: v for v,k in enumerate(catIds.flatten())}
imgIds = []
for catId in catIds:
    imgIds += coco.getImgIds(catIds=catId );

# filter out duplicate images
imgIds = list(set(imgIds))

imgJSON = coco.loadImgs(imgIds)
print('total images: ',len(imgJSON))

In [None]:
# create destination folder for copying image files
destDir = '%s/images/%s/'%(dataDir,dataType + '_' + '_'.join(catNms))
destLabelDir = '%s/labels/%s/'%(dataDir,dataType + '_' + '_'.join(catNms))
%mkdir {destDir} {destLabelDir}

In [None]:
import cv2
image = cv2.imread('%s/images/%s/%s'%(dataDir,dataType,imgJSON[imgidx]['file_name']))
for an in anns:
    if an['category_id'] == catIds[0]:
        x,y,w,h = [int(i) for i in an['bbox']]
#         display(an['bbox'])
    cv2.rectangle(image,(x,y),(x+w,y+h),[255,0,0],2)
plt.imshow(image)

In [None]:
# copy all images containing a given categories into a specific folder
showBox = False
showThis = np.random.randint(len(imgJSON))+1
for i,img in enumerate(imgJSON):
    
    if i == showThis:
        showBox = True
    print('\r{} of {}'.format(i,len(imgJSON)),end='')
    imgPath = '%s/images/%s/%s'%(dataDir,dataType,img['file_name'])
    if showBox:
        image = cv2.imread(imgPath)
    labelName = destLabelDir + img['file_name'].split('.')[0] + '.txt'
    annIds = []
    for cID in catIds:
        annIds += coco.getAnnIds(imgIds=img['id'],catIds=cID,iscrowd=None)
    if len(annIds) == 0:
        print('annotation IDS IS EMPTY')
        break
    anns = coco.loadAnns(annIds)
#     print(anns)
#     create text file for the labels in a particular photo
    with open(labelName,'w+') as f:
        for an in anns:
#             print(catIdsDict)
            if an['category_id'] in catIds.flatten():
                if showBox:
                    x,y,w,h = [int(i) for i in an['bbox']]
                    cv2.rectangle(image,(x,y),(x+w,y+h),[255,0,0],2)
                    
                labelLine = [str(catIdsDict[an['category_id']]),
                                '%.4f' % (an['bbox'][0]/img['width']),
                                '%.4f' % (an['bbox'][1]/img['height']),
                             '%.4f' % (an['bbox'][2]/img['width']),
                             '%.4f' % (an['bbox'][3]/img['height'])]
#                 print(labelLine)
                labelLine = ','.join(labelLine) + '\n'
#                 print(labelLine)
                f.write(labelLine)
    if showBox:
        print('showThis = ',showThis)
        plt.imshow(image)
        showBox = False
#         break
#     copy the desired image into the required folder
    %cp {imgPath} {destDir}