In [12]:
! pip install git+https://github.com/philferriere/cocoapi.git#egg=pycocotools^&subdirectory=PythonAPI



You are using pip version 19.0.3, however version 20.1 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [13]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import pickle
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [14]:
# Load the annotations for stuff categories (backgrounds)
# This cell depends on local file path to annotations
dataDir='train2017'
dataType='train2017'
annFile='{}/annotations/stuff_{}.json'.format(dataDir,dataType)

In [15]:
# initialize COCO api for instance annotations
coco=COCO(annFile)

loading annotations into memory...
Done (t=71.45s)
creating index...
index created!


In [16]:
outdoor_stuff = set(["water", "ground", "solid", "sky", "plant", "structural", "building"])
indoor_stuff = set(["food-stuff", "textile", "furniture-stuff", "window", "floor", "ceiling", "wall", "raw-material"])

### Generate Set for Each Supercategory

In [17]:
stuff = outdoor_stuff.union(indoor_stuff)
supercategory_ids = coco.getCatIds(supNms=stuff)

list_of_supercategory_img_ids = list()
for supercategory_id in supercategory_ids:
    list_of_supercategory_img_ids.append(set(coco.getImgIds(catIds = supercategory_id)))

### Generate Person / No Person Combinations

In [18]:
# Load the annotations for thing categories (objects in image)
# This cell depends on local file path to annotations
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
coco=COCO(annFile)

loading annotations into memory...
Done (t=23.09s)
creating index...
index created!


In [19]:
# Get Img Ids for images containing people and images with no people
person_id = coco.getCatIds(catNms=['person'])
only_people_imgs = set(coco.getImgIds(catIds = person_id))
all_img_ids = set(coco.getImgIds())
no_people_imgs = list(all_img_ids - only_people_imgs)
only_people_imgs = list(only_people_imgs)

In [20]:
# Get the people and no people img ids for EACH supercategory
list_supercategory_people_img_ids = list()
list_supercategory_no_people_img_ids = list()
for supercategory_img_ids in list_of_supercategory_img_ids:
    supercategory_people_imgs = list(set(supercategory_img_ids) & (set(only_people_imgs)))
    list_supercategory_people_img_ids.append(supercategory_img_ids)
    supercategory_no_people_imgs = list(set(supercategory_img_ids) & (set(no_people_imgs)))
    list_supercategory_no_people_img_ids.append(supercategory_no_people_imgs)

### Convert Img Id Sets to Images in .pkl format

In [21]:
all_imgs = list()
supercategory_to_count = dict()

# Currently hardcoded dataset to only package 1000 no people images
for img_id in no_people_imgs[0:1000]:
    annotation_ids = coco.getAnnIds(img_id)
    annotations = coco.loadAnns(annotation_ids)
    already_seen_categories = set()
    already_seen_categories.add("no-person")
    for i in range(len(annotations)):
        entity_id = annotations[i]["category_id"]
        entity = coco.loadCats(entity_id)[0]["supercategory"]
        if(entity == "other"):
            continue
        if(entity not in already_seen_categories):
            already_seen_categories.add(entity)
    
    img = coco.loadImgs(img_id)[0]
    I = io.imread(img['coco_url'])
    all_imgs.append((I,already_seen_categories))
    
# Currently hardcoded dataset to only package 1000 people images
for img_id in only_people_imgs[0:1000]:
    annotation_ids = coco.getAnnIds(img_id)
    annotations = coco.loadAnns(annotation_ids)
    already_seen_categories = set()
    already_seen_categories.add("person")
    for i in range(len(annotations)):
        entity_id = annotations[i]["category_id"]
        entity = coco.loadCats(entity_id)[0]["supercategory"]
        if(entity == "other"):
            continue
        if(entity not in already_seen_categories):
            already_seen_categories.add(entity)
    
    img = coco.loadImgs(img_id)[0]
    I = io.imread(img['coco_url'])
    all_imgs.append((I,already_seen_categories))
    

In [22]:
# Convert in memory images to .pkl to be used by demo
with open('federated-learning-data.pkl', 'wb') as f:
    pickle.dump(all_imgs, f)