In [74]:
import numpy as np
from PIL import Image
import requests
import io
import json
import datetime as dt
import logging
from shapely import wkt
import requests
from PIL import Image
import cv2

In [36]:
ENV = {}
with open(".env", 'r') as f:
    lines = [row.rstrip('\n') for row in f]
    for line in lines:
        key, value = line.split("=")
        ENV[key] = value
headers = {'Authorization': f"Bearer {ENV['LABELBOX_API_KEY']}"}

In [61]:
def url_to_mask(url):
    ndjson_response = requests.get(url, headers=headers)
    stream = io.BytesIO(ndjson_response.content)
    img = Image.open(stream).convert('1')
    pixels = np.asarray(img)
    return np.argwhere(pixels)

### Building the actual dataset

In [72]:
with open('data.json', 'r') as f:
    lines = f.read()
    label_data = json.loads(lines)

In [64]:
coco = {
    'info': None,
    'images': [],
    'annotations': [],
    'licenses': [],
    'categories': []
}

coco['info'] = {
    'year': dt.datetime.now(dt.timezone.utc).year,
    'version': None,
    'description': label_data[0]['Project Name'],
    'contributor': label_data[0]['Created By'],
    'url': 'labelbox.com',
    'date_created': dt.datetime.now(dt.timezone.utc).isoformat()
}


In [73]:
categories = set()
for data in label_data:
    for obj in data['Label']['objects']:
        categories.add(obj['value'])
categories

{'mct'}

In [65]:
for data in label_data:
    # Download and get image name
    try:
        response = requests.get(data['Labeled Data'], stream=True)
    except requests.exceptions.MissingSchema as e:
        logging.exception(('"Labeled Data" field must be a URL. '
                            'Support for local files coming soon'))
        continue
    except requests.exceptions.ConnectionError as e:
        logging.exception('Failed to fetch image from {}'
                            .format(data['Labeled Data']))
        continue

    response.raw.decode_content = True
    im = Image.open(response.raw)
    width, height = im.size
    import pdb; pdb.set_trace()

    image = {
        "id": data['ID'],
        "width": width,
        "height": height,
        "file_name": data['Labeled Data'],
        "license": None,
        "flickr_url": data['Labeled Data'],
        "coco_url": data['Labeled Data'],
        "date_captured": None,
        "categories": categories,
    }

    coco['images'].append(image)
    
    objects = data['Label']['objects']
    categories = set()
    for obj in objects:
        categories.add(obj['
    # convert WKT multipolygon to COCO Polygon format
    for cat in data['Label'].keys():

        try:
            # check if label category exists in 'categories' field
            cat_id = [c['id'] for c in coco['categories'] if c['supercategory'] == cat][0]
        except IndexError as e:
            cat_id = len(coco['categories']) + 1
            category = {
                'supercategory': cat,
                'id': len(coco['categories']) + 1,
                'name': cat
            }
            coco['categories'].append(category)
        import pdb; pdb.set_trace()
        multipolygon = wkt.loads(data['Label'][cat])
        for m in multipolygon:
            segmentation = []
            for x, y in m.exterior.coords:
                segmentation.extend([x, height-y])

            annotation = {
                "id": len(coco['annotations']) + 1,
                "image_id": data['ID'],
                "category_id": cat_id,
                "segmentation": [segmentation],
                "area": m.area,  # float
                "bbox": [m.bounds[0], m.bounds[1],
                            m.bounds[2]-m.bounds[0],
                            m.bounds[3]-m.bounds[1]],
                "iscrowd": 0
            }

            coco['annotations'].append(annotation)


> [0;32m<ipython-input-65-207c419c7eb0>[0m(20)[0;36m<module>[0;34m()[0m
[0;32m     18 [0;31m[0;34m[0m[0m
[0m[0;32m     19 [0;31m    image = {
[0m[0;32m---> 20 [0;31m        [0;34m"id"[0m[0;34m:[0m [0mdata[0m[0;34m[[0m[0;34m'ID'[0m[0;34m][0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     21 [0;31m        [0;34m"width"[0m[0;34m:[0m [0mwidth[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     22 [0;31m        [0;34m"height"[0m[0;34m:[0m [0mheight[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  im


<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1093x1080 at 0x7F5AECD96EB0>


ipdb>  im.width


1093


ipdb>  im.height


1080


ipdb>  data


{'ID': 'cl8ob1whs3vfh07y59k43df1u', 'DataRow ID': 'cl8n71piq25w60736fvy30qw4', 'Labeled Data': 'https://storage.labelbox.com/cl88r8eyw0de307z08t5wcqw5%2F5f29bb71-2ca1-c540-a597-f78d68d9f4ab-20160217_112545.jpg?Expires=1666900288736&KeyName=labelbox-assets-key-3&Signature=y62sbL1NsE43Tu9AJwrcvnJBMX4', 'Label': {'objects': [{'featureId': 'cl8ob270d0000356k2rwbkdr5', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob270d0000356k2rwbkdr5?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob4bwi0007356kfon154oj', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob4bwi0007356kfon1

ipdb>  data['Label']


{'objects': [{'featureId': 'cl8ob270d0000356k2rwbkdr5', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob270d0000356k2rwbkdr5?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob4bwi0007356kfon154oj', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob4bwi0007356kfon154oj?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob5e28000d356ki05ojijv', 'schemaId': 'cl8

ipdb>  multipolygon


*** NameError: name 'multipolygon' is not defined


ipdb>  data['Label'].keys()


dict_keys(['objects', 'classifications', 'relationships'])


ipdb>  c


> [0;32m<ipython-input-65-207c419c7eb0>[0m(47)[0;36m<module>[0;34m()[0m
[0;32m     45 [0;31m            [0mcoco[0m[0;34m[[0m[0;34m'categories'[0m[0;34m][0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mcategory[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     46 [0;31m        [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 47 [0;31m        [0mmultipolygon[0m [0;34m=[0m [0mwkt[0m[0;34m.[0m[0mloads[0m[0;34m([0m[0mdata[0m[0;34m[[0m[0;34m'Label'[0m[0;34m][0m[0;34m[[0m[0mcat[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     48 [0;31m        [0;32mfor[0m [0mm[0m [0;32min[0m [0mmultipolygon[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     49 [0;31m            [0msegmentation[0m [0;34m=[0m [0;34m[[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  wkt.loads(data['Label'][cat])


*** TypeError: Only str is accepted.


ipdb>  data['Label'][cat]


[{'featureId': 'cl8ob270d0000356k2rwbkdr5', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob270d0000356k2rwbkdr5?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob4bwi0007356kfon154oj', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob4bwi0007356kfon154oj?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob5e28000d356ki05ojijv', 'schemaId': 'cl896c5ki0bfx07

ipdb>  image


{'id': 'cl8ob1whs3vfh07y59k43df1u', 'width': 1093, 'height': 1080, 'file_name': 'https://storage.labelbox.com/cl88r8eyw0de307z08t5wcqw5%2F5f29bb71-2ca1-c540-a597-f78d68d9f4ab-20160217_112545.jpg?Expires=1666900288736&KeyName=labelbox-assets-key-3&Signature=y62sbL1NsE43Tu9AJwrcvnJBMX4', 'license': None, 'flickr_url': 'https://storage.labelbox.com/cl88r8eyw0de307z08t5wcqw5%2F5f29bb71-2ca1-c540-a597-f78d68d9f4ab-20160217_112545.jpg?Expires=1666900288736&KeyName=labelbox-assets-key-3&Signature=y62sbL1NsE43Tu9AJwrcvnJBMX4', 'coco_url': 'https://storage.labelbox.com/cl88r8eyw0de307z08t5wcqw5%2F5f29bb71-2ca1-c540-a597-f78d68d9f4ab-20160217_112545.jpg?Expires=1666900288736&KeyName=labelbox-assets-key-3&Signature=y62sbL1NsE43Tu9AJwrcvnJBMX4', 'date_captured': None}


ipdb>  data


{'ID': 'cl8ob1whs3vfh07y59k43df1u', 'DataRow ID': 'cl8n71piq25w60736fvy30qw4', 'Labeled Data': 'https://storage.labelbox.com/cl88r8eyw0de307z08t5wcqw5%2F5f29bb71-2ca1-c540-a597-f78d68d9f4ab-20160217_112545.jpg?Expires=1666900288736&KeyName=labelbox-assets-key-3&Signature=y62sbL1NsE43Tu9AJwrcvnJBMX4', 'Label': {'objects': [{'featureId': 'cl8ob270d0000356k2rwbkdr5', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob270d0000356k2rwbkdr5?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob4bwi0007356kfon154oj', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob4bwi0007356kfon1

ipdb>  data['label']


*** KeyError: 'label'


ipdb>  data['Label']


{'objects': [{'featureId': 'cl8ob270d0000356k2rwbkdr5', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob270d0000356k2rwbkdr5?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob4bwi0007356kfon154oj', 'schemaId': 'cl896c5ki0bfx07xk9rro1h99', 'color': '#FF34FF', 'title': 'MCT', 'value': 'mct', 'instanceURI': 'https://api.labelbox.com/masks/feature/cl8ob4bwi0007356kfon154oj?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDg4cjhlenAwZGU0MDd6MGJmOXMwMHdvIiwib3JnYW5pemF0aW9uSWQiOiJjbDg4cjhleXcwZGUzMDd6MDh0NXdjcXc1IiwiaWF0IjoxNjY1NjkwNjg3LCJleHAiOjE2NjgyODI2ODd9.r0dcr1bwcI8v8mzzSKv6CFR952Uh6d3l_0H7WcYkIN4'}, {'featureId': 'cl8ob5e28000d356ki05ojijv', 'schemaId': 'cl8

ipdb>  data['Label'].keys()


dict_keys(['objects', 'classifications', 'relationships'])


ipdb>  data['Label']['classifications']


[]


ipdb>  data['Label']['relationships']


[]


ipdb>  c


TypeError: Only str is accepted.

In [None]:
with open('coco.json', 'w+') as f:
    f.write(json.dumps(coco))