# Convert annot. file to COCO format

## Imports

In [34]:
import os 
from glob import glob
import sys
import json
import cv2

FRAMES_DIR = os.path.join("data","frames")

## COCO template

In [118]:
# https://towardsdatascience.com/getting-started-with-coco-dataset-82def99fa0b8
COCO_JSON_CONTENT_TEMPLATE = '''
  "info": {info_dict},
  "licenses": {licenses_list},
  "images": {images_list},
  "categories": {categories_list},
  "annotations": {annotations_list}'''

IMAGE_DICT_CONTENT_TEMPLATE = '''
    "id": {id}, 
    "width": {width}, 
    "height": {height}, 
    "file_name": "{file_name}", 
    "license": {license}'''

BBOX_ANNOT_DICT_CONTENT_TEMPLATE = '''
    "image_id": {image_id},
    "bbox":
    [
        {x1},
        {y1},
        {width},
        {height}
    ],
    "category_id": {category_id},
    "id": {id}, 
    "iscrowd":0,
    "area": {area}
'''
coco = COCO_JSON_CONTENT_TEMPLATE

## Read orig. annot. and number of extracted frames, image dims.

In [10]:
with open(os.path.join("data","top-100-shots-rallies-2018-atp-season-scoreboard-annotations.json")) as json_file:
    data = json.load(json_file)

In [35]:
frames_n = len(glob(os.path.join(FRAMES_DIR,"*.jpg")))
print(f"Frames number: {frames_n}")

Frames number: 43245


In [36]:
img = cv2.imread(os.path.join(FRAMES_DIR,"0.jpg"))
height, width, _ = img.shape
print(f"Image dimensions: {height} x {width}")

Image dimensions: 1080 x 1920


## Writa data to template

### Basic data

In [37]:
info_dict = {"description": "Sportsradar interview task"}

licenses_list = [
  {
    "id": 1,
    "name": "TODO",
    "url": "TODO",  
  }
]

categories_list = [{"supercategory": "scoreboard", "id": 1, "name": "scoreboard"}] 

## Build images list

In [90]:
images_list = []
frame_filepaths = glob(os.path.join(FRAMES_DIR,"*.jpg"))
for frame_filepath in frame_filepaths: 
    _, filename = os.path.split(frame_filepath)
    frame_idx, _ = filename.split(".")
    assert frame_idx.isnumeric()
    frame_idx = int(frame_idx)
    img_data = json.loads('{'+IMAGE_DICT_CONTENT_TEMPLATE.format(id=frame_idx, 
                                                  width=width,
                                                  height=height,
                                                  file_name=filename, 
                                                  license=1)+'}')
    images_list.append(img_data)
assert len(images_list) == frames_n

## Build annotations list

In [92]:
annotations_list = []
for frame_idx, record in data.items():
    frame_idx = str(frame_idx)
    x1, y1, x2, y2 = record["bbox"]
    x, y = x1, y1
    assert x2 >= x1 
    assert y2 >= y1
    w, h = x2-x1, y2-y1
    image_id = frame_idx
    anno_data = json.loads('{'+BBOX_ANNOT_DICT_CONTENT_TEMPLATE.format(image_id = frame_idx, 
                                                                  x1 = x,
                                                                  y1 = y, 
                                                                  width = w, 
                                                                  height = h,
                                                                  category_id = 1,
                                                                  id = frame_idx,
                                                                  area = w*h)+'}')
    # add custom keys
    
    annotations_list.append(anno_data)

In [119]:
print(('{'+COCO_JSON_CONTENT_TEMPLATE.format(info_dict = info_dict,
                                                 licenses_list = licenses_list,
                                                 images_list = images_list,
                                                 categories_list = categories_list,
                                                 annotations_list = annotations_list)+'}').splitlines()[0:2])

['{', '  "info": {\'description\': \'Sportsradar interview task\'},']


## Build COCO dict.

In [122]:
coco = json.loads('{'+COCO_JSON_CONTENT_TEMPLATE.format(info_dict = info_dict,
                                                 licenses_list = licenses_list,
                                                 images_list = images_list,
                                                 categories_list = categories_list,
                                                 annotations_list = annotations_list).replace("\'","\"")+'}')

In [124]:
assert len(coco["images"]) == len(images_list)
assert len(coco["annotations"]) == len(annotations_list)

In [127]:
dest_fp = os.path.join("data",
                       "top-100-shots-rallies-2018-atp-season-scoreboard-annotations_coco.json")
with open(dest_fp, 
          'w') as f:
    json.dump(coco, f)
    print(f"Saved COCO-style dict. to {dest_fp}")

Saved COCO-style dict. to data/top-100-shots-rallies-2018-atp-season-scoreboard-annotations_coco.json
