# File Overview

Dictionary structures included in this notebook:

annotations[id]
  * list of dictionaries containing:
    * 'class_id' : ID associated with annotation class
    * 'height' : height
    * 'left': (start pixel of bbox on left side)
    * 'top': (start pixel of bbox on top)
    * 'width': width

images[id]
  * 'url': image_url
  * 'image_size'
    * list of one dictionary containing:
      * 'depth': all 
      * 'height': height of image
      * 'width': width of image

classes[classID]
  * category/label name

# Import libraries and mount Drive

In [1]:
!pip install xmltodict



In [12]:
import json
import xmltodict
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import requests
from io import BytesIO
from PIL import Image
import pandas as pd
import pickle
import urllib
import os
import gzip
from datetime import datetime,timezone,timedelta

In [None]:
# uncomment if using colab
# from google.colab import drive
# drive.mount('/content/drive')

/content/drive/My Drive/LangCog


# Load object labels

In [13]:
def loadObjList(txtFileLink):
    # define an empty list
    words = []
    
    # open file and read the content into a list
    txtFile = urllib.request.urlopen(txtFileLink)
    for line in txtFile:
      decoded_line = line.decode("utf-8")

      # remove linebreak which is the last character of the string
      currentWord = decoded_line[:-1]

      # add item to the list
      words.append(currentWord)

    words = list(set(words))
    words.sort() #alphabetize
    return words

In [14]:
categories = {}       # dictionary mapping labels to ids
catId = 1
for cat in labels:
  categories[cat] = catId
  catId+=1

In [15]:
categories

{'bag': 1,
 'ball': 2,
 'basket': 3,
 'block': 4,
 'book': 5,
 'bottle': 6,
 'box': 7,
 'can': 8,
 'cat': 9,
 'computer': 10,
 'crayon': 11,
 'cup': 12,
 'door': 13,
 'drum': 14,
 'fruit': 15,
 'guitar': 16,
 'ipad': 17,
 'marker': 18,
 'other food': 19,
 'other toy': 20,
 'paper': 21,
 'phone': 22,
 'photograph': 23,
 'piano': 24,
 'plant': 25,
 'plate': 26,
 'play dough': 27,
 'play gym': 28,
 'puzzle': 29,
 'remote': 30,
 'shoe': 31,
 'sippy cup': 32,
 'spoon': 33,
 'stuffed animal': 34,
 'table': 35,
 'toilet': 36,
 'xylophone': 37}

So now, we have a list of labels from the object list and a mapping of category names to ids (this is for putting the data in COCO JSON format)

# Load JSONs

Install the manifest-tool to use for loading manifest file

In [39]:
manifestFileName = "https://raw.githubusercontent.com/brialorelle/headcam-objects/master/data/annotations/broad_category_segmentations/24K/output.manifest"

manifestFile = requests.get(manifestFileName)

manifestString = "[" + manifestFile.text.replace("}\n{", "},\n{") + "]"

data = json.loads(manifestString)

In [None]:
for frame in data:
  print(frame)

{'source-ref': 's3://interestingsaycam/images/11125-A_20150127_2814_01.mp4-38000.jpg', 'saycam-objects-3-workers-test': {'image_size': [{'width': 640, 'height': 480, 'depth': 3}], 'annotations': [{'class_id': 5, 'top': 38, 'left': 278, 'height': 148, 'width': 53}, {'class_id': 5, 'top': 176, 'left': 293, 'height': 28, 'width': 28}, {'class_id': 5, 'top': 166, 'left': 273, 'height': 23, 'width': 26}, {'class_id': 5, 'top': 0, 'left': 260, 'height': 60, 'width': 105}, {'class_id': 6, 'top': 2.6666666666666856, 'left': 264, 'height': 201.66666666666663, 'width': 84.66666666666669}, {'class_id': 0, 'top': 136.33333333333331, 'left': 477.6666666666667, 'height': 343, 'width': 161.66666666666669}, {'class_id': 13, 'top': 106, 'left': 5, 'height': 369, 'width': 556}, {'class_id': 12, 'top': 173, 'left': 492, 'height': 301, 'width': 148}]}, 'saycam-objects-3-workers-test-metadata': {'objects': [{'confidence': 0.09}, {'confidence': 0.09}, {'confidence': 0.09}, {'confidence': 0.09}, {'confidence

In [None]:
images = {}
annotations = {}
id = 0
classes = {}
taskName = 'saycam-objects-3-workers-test'
taskNameMetadata = 'saycam-objects-3-workers-test-metadata'
for frame in data:
  imagename = frame['source-ref'].split('/')[-1]
  image_url = 'http://langcog.stanford.edu/expts/saycam/frames/' + imagename
  images[id] = {'url': image_url, 'image_size': frame[taskName]['image_size']}
  annotations[id] = frame[taskName]['annotations'] # ignore confidence for now: 'confidence': frame[taskNameMetadata]['objects']}
  for key in frame[taskNameMetadata]['class-map']:
    if key not in classes.keys():
      classes[int(key)] = frame[taskNameMetadata]['class-map'][key]
  id = id + 1

We now have 3 dataframes. 
- <b>images</b> stores all the image information associated with an arbitrary id
- <b>annotations</b> stores all the segmentation information associated with the same id as the image
- <b>classes</b> stores a unique id associated with each class, based on Sagemaker's id assignment

In [None]:
annotations

{0: [{'class_id': 5, 'height': 148, 'left': 278, 'top': 38, 'width': 53},
  {'class_id': 5, 'height': 28, 'left': 293, 'top': 176, 'width': 28},
  {'class_id': 5, 'height': 23, 'left': 273, 'top': 166, 'width': 26},
  {'class_id': 5, 'height': 60, 'left': 260, 'top': 0, 'width': 105},
  {'class_id': 6,
   'height': 201.66666666666663,
   'left': 264,
   'top': 2.6666666666666856,
   'width': 84.66666666666669},
  {'class_id': 0,
   'height': 343,
   'left': 477.6666666666667,
   'top': 136.33333333333331,
   'width': 161.66666666666669},
  {'class_id': 13, 'height': 369, 'left': 5, 'top': 106, 'width': 556},
  {'class_id': 12, 'height': 301, 'left': 492, 'top': 173, 'width': 148}],
 1: [{'class_id': 12,
   'height': 371.5,
   'left': 3.5,
   'top': 105,
   'width': 330.5},
  {'class_id': 12, 'height': 93, 'left': 63, 'top': 314, 'width': 100},
  {'class_id': 13, 'height': 477, 'left': 2, 'top': 2, 'width': 636}],
 2: [{'class_id': 0,
   'height': 131.33333333333334,
   'left': 24,
   '

In [None]:
classes

{0: 'Furniture',
 1: 'Vehicle (real)',
 3: 'Animal (real)',
 5: 'Clothing',
 6: 'Person',
 8: 'Food',
 9: 'Utensils',
 10: 'Toy (large)',
 11: 'Toy (small)',
 12: 'Other small object',
 13: 'Building',
 14: 'Plant'}

# Convert JSONs to COCO JSON format

In [None]:
import os
def get_filename_as_int(filename):
    try:
        filename = os.path.basename(filename)
        filename = filename.split('/')[-1].split('.')[0].split('-')[0]
        return filename
    except:
        raise ValueError("Filename %s is supposed to be an integer." % (filename))

In [None]:
# show image information
images[0]['image_size'][0]

{'depth': 3, 'height': 480, 'width': 640}

In [None]:
# conversion function taking dataframes and writing to file in coco json format
def convert(categories, images, annotations, json_file):
  json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
  bnd_id = 1      # bounding box id
  IDsSeen=[]
  for id in annotations.keys():
    im = images[id]
    filename = im['url']
    image_id = get_filename_as_int(filename) 
    if image_id not in IDsSeen:
      width = im['image_size'][0]['width']
      height = im['image_size'][0]['height']
      image = {
          "file_name": filename,
          "height": height,
          "width": width,
          "id": image_id}
      json_dict["images"].append(image)
      IDsSeen.append(image_id)
    for an in annotations[id]:
      category_id = an['class_id']
      xmin = an['left']
      ymin = an['top']
      width = an['width']
      height = an['height']
      image_id = get_filename_as_int(filename)
      ann = {
          "area": width * height,
          "iscrowd": 0,
          "image_id": image_id,
          "bbox": [xmin, ymin, width, height],
          "category_id": category_id,
          "id": bnd_id,
          "ignore": 0,
          "segmentation": [],
      }
      json_dict["annotations"].append(ann)
      bnd_id = bnd_id + 1
  for cid, cate in categories.items():
    cat = {"supercategory": "none", "id": cid, "name": cate}
    json_dict["categories"].append(cat)

  os.makedirs(os.path.dirname(json_file), exist_ok=True)
  json_fp = open(json_file, "w")
  json_str = json.dumps(json_dict)
  json_fp.write(json_str)
  json_fp.close()

In [None]:
convert(classes, images, annotations, "./output.json")

# Load COCO JSON

# Dump

In [None]:
labels = loadObjList('https://raw.githubusercontent.com/brialorelle/headcam-objects/master/data/category_lists/categories.txt')
with open('categories.txt', 'w') as f:
    for item in labels:
        f.write("%s\n" % item)

In [None]:
import json
from pprint import pprint

with open('output.manifest') as f:
    data = json.loads("[" + 
        f.read().replace("}\n{", "},\n{") + 
    "]")

    print(data)