Convert Alphapilot JSON format to [VoTT](https://github.com/Microsoft/VoTT) custom vision service JSON format.  This allows images and labels to be visualized in VoTT, which is a nice way to see them.  Good for measuring accuracy of ground truths and predictions.

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import os
import shutil
import pathlib
import random

In [3]:
path = pathlib.Path.home()/'.fastai/data'

In [4]:
# copy labels from git
! mkdir -p $path/labels
! cp -r labels $path/
# copy results from git
! cp -r results $path/

In [49]:
labels_dir = path/'labels'

In [5]:
#df = pd.read_json(labels_dir/'training_GT_labels_v2.json')
#df = pd.read_json(labels_dir/'training_combined_labels.json') 
#df = pd.read_json(labels_dir/'fixed_9299.json')
#df = pd.read_json(labels_dir/'good_community_labels/trainingdata_9000_to_9499_good_2.json')
df = pd.read_json(path/'results/results_v20run1_stage26_test.json')

In [6]:
#fnames = ["IMG_0005.JPG","IMG_0006.JPG","IMG_0007.JPG"]
fnames = df.keys()

In [7]:
len(fnames)

1161

In [8]:
def body(fname, df):
    coords = df[fname][0]
    uid1 = hex(random.randint(0, 4294967295))[2:]
    uid2 = hex(random.randint(0, 4294967295))[2:]
    if len(coords) == 8:  # just coords
        x1, y1, x2, y2, x3, y3, x4, y4 = tuple(coords)
        return f'"{fname}":[{{"x1":0,"y1":0,"x2":0,"y2":0,"width":1296,"height":864,"box":{{"x1":0,"y1":0,"x2":0,"y2":0}},"points":[],"UID":"{uid1}","id":0,"type":"polyline","tags":["gate"],"name":1}},{{"x1":{x1},"y1":{y1},"x2":{x3},"y2":{y3},"width":1296,"height":864,"box":{{"x1":{x1},"y1":{y1},"x2":{x3},"y2":{y3}}},"points":[{{"x":{x1},"y":{y1}}},{{"x":{x2},"y":{y2}}},{{"x":{x3},"y":{y3}}},{{"x":{x4},"y":{y4}}}],"UID":"{uid2}","id":1,"type":"polygon","tags":["gate"],"name":2}}],'
    elif len(coords) == 9:  # coords + confidence (for predictions)
        x1, y1, x2, y2, x3, y3, x4, y4, conf = tuple(coords)
        return f'"{fname}":[{{"x1":0,"y1":0,"x2":0,"y2":0,"width":1296,"height":864,"box":{{"x1":0,"y1":0,"x2":0,"y2":0}},"points":[],"UID":"{uid1}","id":0,"type":"polyline","tags":["gate"],"name":1}},{{"x1":{x1},"y1":{y1},"x2":{x3},"y2":{y3},"width":1296,"height":864,"box":{{"x1":{x1},"y1":{y1},"x2":{x3},"y2":{y3}}},"points":[{{"x":{x1},"y":{y1}}},{{"x":{x2},"y":{y2}}},{{"x":{x3},"y":{y3}}},{{"x":{x4},"y":{y4}}}],"UID":"{uid2}","id":1,"type":"polygon","tags":["gate"],"name":2}}],'
    else:
        return f'"{fname}":[],'

In [9]:
# fixed file header
output = '{"frames":{'
# body based on template
for key in fnames:
    output += body(key, df)
# remove final semi colon
output = output[:-1]
    
# fixed footer begining
output += '},"framerate":"1","inputTags":"gate","suggestiontype":"track","scd":false,"visitedFrames":['
# every image name
for fname in fnames:
    output += f'"{fname}",'
# remove final semi colon
output = output[:-1]
# fixed footer ending
output += '],"tag_colors":["#990bb0"]}'

In [10]:
output[:1000]

'{"frames":{"IMG_0008.JPG":[{"x1":0,"y1":0,"x2":0,"y2":0,"width":1296,"height":864,"box":{"x1":0,"y1":0,"x2":0,"y2":0},"points":[],"UID":"eec730c9","id":0,"type":"polyline","tags":["gate"],"name":1},{"x1":153.7985839844,"y1":87.9038772583,"x2":523.7808227539,"y2":533.2077026367,"width":1296,"height":864,"box":{"x1":153.7985839844,"y1":87.9038772583,"x2":523.7808227539,"y2":533.2077026367},"points":[{"x":153.7985839844,"y":87.9038772583},{"x":526.2696533203,"y":159.6353149414},{"x":523.7808227539,"y":533.2077026367},{"x":109.8680953979,"y":556.0162963867}],"UID":"a4e216a6","id":1,"type":"polygon","tags":["gate"],"name":2}],"IMG_0023.JPG":[{"x1":0,"y1":0,"x2":0,"y2":0,"width":1296,"height":864,"box":{"x1":0,"y1":0,"x2":0,"y2":0},"points":[],"UID":"dcec1558","id":0,"type":"polyline","tags":["gate"],"name":1},{"x1":412.3900756836,"y1":287.9408569336,"x2":698.8715209961,"y2":576.2043457031,"width":1296,"height":864,"box":{"x1":412.3900756836,"y1":287.9408569336,"x2":698.8715209961,"y2":576.

In [11]:
with open(path/'vott.json', 'w') as f:
    f.write(output)

```
"framerate":"1","inputTags":"gate","suggestiontype":"track","scd":false,"visitedFrames":["IMG_0005.JPG","IMG_0006.JPG","IMG_0007.JPG"],"tag_colors":["#990bb0"]}
```

In [12]:
# create new image dir only of images with labels in scope
#orig_img_path = path/'Data_Training'
orig_img_path = path/'Data_LeaderboardTesting/'
img_path = path/'vott'

# remove existing good image dir
if os.path.isdir(img_path):
    shutil.rmtree(img_path)
# copy files
os.mkdir(img_path)
for fname in df.keys():
    shutil.copyfile(orig_img_path/fname, img_path/fname)

## DEBUG

In [None]:
# ONLY RUN TO VISUALIZE BAD LABELS


# load bad labels from file
bad_labels_path = labels_dir/'poor_community_labels.txt'
with open(bad_labels_path,'r') as f:
    bad_labels = f.readlines()

bad_labels = [item.strip('\n') for item in bad_labels if item.startswith('IMG')]

# load only the bad labels into the dataframe
for id in df.keys():
    if id not in bad_labels:
        df = df.drop(id, 1)
        
len(bad_labels)


In [None]:
df = pd.read_json(path/'cleaned_community_labels.json')

In [None]:
df

In [None]:
# round values from vott
for key in df.keys():
    df[key][0] = [[round(i) for i in df[key][0]]]

In [None]:
df

In [None]:
# write out clean json file with all the community labels

# convert dataframe to JSON string and strip out outer '[]'
json_str = df.to_json(orient='records')[1:-1]

# write to file
f = open(path/'cleaned_community_labels2.json', 'w')
f.write(json_str)
f.close()