# Converting COCO annotation into JSON Line format

Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

version 0.2

## Description

- This sample notebook will convert COCO annotation file into JSON Line format, which can be registered as an Azure Machine Learning (AML) Labeled Dataset.

## Setup
- None
  
## Usage

- Specify all input parameters in the first cell according to needs and run through all the cells.

## Parameters

| Name | Description | Example |
| --- | --- | --- |
| task_type | The type of annotation:<br> Single label: 'IMAGE_CLASSIFICATION'<br> Multi-Label: 'IMAGE_MULTI_LABEL_CLASSIFICATION'<br>  BoundingBox: 'OBJECT_DETECTION'<br> Polygon: 'IMAGE_INSTANCE_SEGMENTATION' | 'IMAGE_INSTANCE_SEGMENTATION' |
| input_coco_file_path | The full path name of COCO annotation file | 'C:/Sample/Ploygon_COCO.json' |
| output_dir | The name of output sub-directory | 'JsonLines' |


In [None]:
import os

task_type = 'OBJECT_DETECTION'

input_coco_file_path = r"C:\Demo\LabelImport\MyCOCO.json"

output_dir = 'JsonLines'

# default output file name
json_line_file_name = 'LabeledData.jsonl'
output_jsonl_file_path =  os.path.join(os.getcwd(), output_dir, json_line_file_name)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

## Define Converters

In [None]:
import json

class CocoToJSONLinesConverter:
    def convert(self): raise NotImplementedError

class MultiClassConverter(CocoToJSONLinesConverter):
    def __init__(self, coco_data):
        self.json_lines_data = []
        self.categories = {}
        self.coco_data = coco_data
        for i in range(0, len(coco_data['images'])):
            self.json_lines_data.append({})
        for i in range(0, len(coco_data['categories'])):
            self.categories[coco_data['categories'][i]['id']] = coco_data['categories'][i]['name']

    def _populate_image_url(self, json_line, coco_image_url):
        json_line['image_url'] = coco_image_url
        return json_line

    def _populate_label(self, json_line, label_id):
        json_line['label'] = self.categories[label_id]
        return json_line

    def _populate_label_confidence(self, json_line):
        json_line['label_confidence'] = 1.0
        return json_line

    def convert(self):
        for i in range(0, len(self.coco_data['images'])):
            self.json_lines_data[i] = {}
            self.json_lines_data[i] = self._populate_image_url(self.json_lines_data[i], self.coco_data['images'][i]['coco_url'])
            self.json_lines_data[i] = self._populate_label(self.json_lines_data[i], self.coco_data['annotations'][i]['category_id'])
            self.json_lines_data[i] = self._populate_label_confidence(self.json_lines_data[i])
        return self.json_lines_data


class MultiLabelConverter(CocoToJSONLinesConverter):
    def __init__(self, coco_data):
        self.json_lines_data = []
        self.categories = {}
        self.coco_data = coco_data
        self.image_id_to_data_index = {}
        for i in range(0, len(coco_data['images'])):
            self.json_lines_data.append({})
            self.json_lines_data[i]['image_url'] = ""
            self.json_lines_data[i]['label'] = []
            self.json_lines_data[i]['label_confidence'] = []
        for i in range(0, len(coco_data['categories'])):
            self.categories[coco_data['categories'][i]['id']] = coco_data['categories'][i]['name']

    def _populate_image_url(self, index, coco_image):
        self.json_lines_data[index]['image_url'] = coco_image['coco_url']
        self.image_id_to_data_index[coco_image['id']] = index

    def _populate_label(self, annotation):
        index = self.image_id_to_data_index[annotation['image_id']]
        self.json_lines_data[index]['label'].append(self.categories[annotation['category_id']])
        self._populate_label_confidence(index)

    def _populate_label_confidence(self, index):
        self.json_lines_data[index]['label_confidence'].append(1.0)

    def convert(self):
        for i in range(0, len(self.coco_data['images'])):
            self._populate_image_url(i, self.coco_data['images'][i])
        for i in range(0, len(self.coco_data['annotations'])):
            self._populate_label(self.coco_data['annotations'][i])
        return self.json_lines_data


class BoundingBoxConverter(CocoToJSONLinesConverter):
    def __init__(self, coco_data):
        self.json_lines_data = []
        self.categories = {}
        self.coco_data = coco_data
        self.image_id_to_data_index = {}
        for i in range(0, len(coco_data['images'])):
            self.json_lines_data.append({})
            self.json_lines_data[i]['image_url'] = ""
            self.json_lines_data[i]['label'] = []
            self.json_lines_data[i]['label_confidence'] = []
        for i in range(0, len(coco_data['categories'])):
            self.categories[coco_data['categories'][i]['id']] = coco_data['categories'][i]['name']

    def _populate_image_url(self, index, coco_image):
        self.json_lines_data[index]['image_url'] = coco_image['coco_url']
        self.image_id_to_data_index[coco_image['id']] = index

    def _populate_bbox_in_label(self, label, annotation):
        label['topX'] = annotation['bbox'][0]
        label['topY'] = annotation['bbox'][1]
        label['bottomX'] = annotation['bbox'][0] + annotation['bbox'][2]
        label['bottomY'] = annotation['bbox'][1] + annotation['bbox'][3]

    def _populate_label(self, annotation):
        index = self.image_id_to_data_index[annotation['image_id']]
        label = {'label': self.categories[annotation['category_id']]}
        self._populate_bbox_in_label(label, annotation)
        self.json_lines_data[index]['label'].append(label)
        self._populate_label_confidence(index)

    def _populate_label_confidence(self, index):
        self.json_lines_data[index]['label_confidence'].append(1.0)

    def convert(self):
        for i in range(0, len(self.coco_data['images'])):
            self._populate_image_url(i, self.coco_data['images'][i])
        for i in range(0, len(self.coco_data['annotations'])):
            self._populate_label(self.coco_data['annotations'][i])
        return self.json_lines_data


class PolygonConverter(CocoToJSONLinesConverter):
    def __init__(self, coco_data):
        self.json_lines_data = []
        self.categories = {}
        self.coco_data = coco_data
        self.image_id_to_data_index = {}
        for i in range(0, len(coco_data['images'])):
            self.json_lines_data.append({})
            self.json_lines_data[i]['image_url'] = ""
            self.json_lines_data[i]['label'] = []
            self.json_lines_data[i]['label_confidence'] = []
        for i in range(0, len(coco_data['categories'])):
            self.categories[coco_data['categories'][i]['id']] = coco_data['categories'][i]['name']

    def _populate_image_url(self, index, coco_image):
        self.json_lines_data[index]['image_url'] = coco_image['coco_url']
        self.image_id_to_data_index[coco_image['id']] = index

    def _populate_bbox_in_label(self, label, annotation):
        top_x = annotation['bbox'][0]
        top_y = annotation['bbox'][1]
        bottom_x = annotation['bbox'][0] + annotation['bbox'][2]
        bottom_y = annotation['bbox'][1] + annotation['bbox'][3]
        label['bbox'] = [top_x, top_y, bottom_x, bottom_y]

    def _populate_label(self, annotation):
        index = self.image_id_to_data_index[annotation['image_id']]
        label = {'label': self.categories[annotation['category_id']], 'isCrowd': False,
                 'polygon': annotation['segmentation']}
        self._populate_bbox_in_label(label, annotation)
        self.json_lines_data[index]['label'].append(label)
        self._populate_label_confidence(index)

    def _populate_label_confidence(self, index):
        self.json_lines_data[index]['label_confidence'].append(1.0)

    def convert(self):
        for i in range(0, len(self.coco_data['images'])):
            self._populate_image_url(i, self.coco_data['images'][i])
        for i in range(0, len(self.coco_data['annotations'])):
            self._populate_label(self.coco_data['annotations'][i])
        return self.json_lines_data


## Convert COCO Annotation to JSON Line file

In [None]:

def read_coco_file(coco_file):
    with open(coco_file) as f_in:
        return json.load(f_in)

def write_json_lines(converter, filename):
    json_lines_data = converter.convert()
    with open(filename, 'w') as outfile:
        for json_line in json_lines_data:
            json.dump(json_line, outfile, separators=(',', ':'))
            outfile.write('\n')

print('AML Data Labeling Task type: {}'.format(task_type))
print('Converting COCO annotation file "{}" into JSON Line file "{}"'.format(input_coco_file_path, output_jsonl_file_path))

coco_data = read_coco_file(input_coco_file_path)

if task_type == 'IMAGE_CLASSIFICATION':
    converter = MultiClassConverter(coco_data)
    write_json_lines(converter, output_jsonl_file_path)
elif task_type == 'IMAGE_MULTI_LABEL_CLASSIFICATION':
    converter = MultiLabelConverter(coco_data)
    write_json_lines(converter, output_jsonl_file_path)
elif task_type == 'OBJECT_DETECTION':
    converter = BoundingBoxConverter(coco_data)
    write_json_lines(converter, output_jsonl_file_path)
elif task_type == 'IMAGE_INSTANCE_SEGMENTATION':
    converter = PolygonConverter(coco_data)
    write_json_lines(converter, output_jsonl_file_path)
else:
    print("ERROR: Invalid Task Type")
    pass

print('Done.')

## End of notebook