In [54]:
import glob
import ujson 
import os
import re
import sys
from PIL import Image

In [55]:
def write_json(readpath, grep_pattern="*.jpeg", write_metadata=True):
    """
    readpath: the location of the dataset. We expect to see jpegs and corresponding human annotations in a json file
    grep_pattern: "*.jpeg" pulls all jpegs in the first level. Do "*/*.jpeg" if your data is structured as "product_type"/"*.jpeg"
    write_metadata: Decorates each jpeg with available metadata. Expects each jpeg to have a -metadata.json
    """
    anns = []
    files = glob.glob('{}/{}'.format(readpath,grep_pattern))
    for f in files:
        f_dict = {"filepath": f}
        
        # get image dimensions, we need this later to unnormalize boxes if needed
        with Image.open(f) as img:
            width, height = img.size
            f_dict['width'] = width
            f_dict['height'] = height
        
        # get the corresponding annotations
        annpath = re.sub(".jpeg", "-annotation.json", f)
        if not os.path.exists("{}".format(annpath)):
            print("Warning: Annotation does not exist for {}".format(f))
            continue
        with open(annpath,'r') as af:
            f_dict["annotations"] = ujson.load(af)
            
        # get the corresponding metadata
        metapath = re.sub(".jpeg", "-metadata.json", f)
        if write_metadata and os.path.exists("{}".format(metapath)):
            with open(metapath, 'r') as mf:
                f_dict["metadata"] = ujson.load(mf)
        anns.append(f_dict)
    return anns

In [56]:
json_ann = write_json('/data/furniture_stock_data_24pt_filtered/Rocking_Chairs')



In [60]:
json_ann

[{'annotations': [{'class': 'Rocking_Chairs',
    'xmax': 1.0,
    'xmin': 0.0,
    'ymax': 0.746666666666666,
    'ymin': 0.0}],
  'filepath': '/data/furniture_stock_data_24pt_filtered/Rocking_Chairs/6841c20aac6c53009f2cb920311c927d34e267ef.jpeg',
  'height': 3264,
  'metadata': [{'asset_rank': 1,
    'item_id': '47628372',
    'product_id': '52SV03WV22FB',
    'url': 'https://i5.walmartimages.com/asr/5206c586-ea57-4ec1-b2ad-0fe8646dc9f5_1.6813c24b6d0bda85c85a4084c5842bdb.jpeg'}],
  'width': 2448},
 {'annotations': [{'class': 'Rocking_Chairs',
    'xmax': 0.9466666666666661,
    'xmin': 0.943333333333333,
    'ymax': 0.963333333333333,
    'ymin': 0.96}],
  'filepath': '/data/furniture_stock_data_24pt_filtered/Rocking_Chairs/082f4c06ca96361dcb98172e061279d395d9f4eb.jpeg',
  'height': 1500,
  'metadata': [{'asset_rank': 3,
    'item_id': '402423247',
    'product_id': '6IZHTKPQ4DBH',
    'url': 'https://i5.walmartimages.com/asr/27658f90-73aa-412c-865f-4c7c7a44dfd2_1.e87557d2d24864f4d8d

In [62]:
with open('/data/tmp/test.json','w') as tf:
    ujson.dump(json_ann, tf)