In [1]:
import json
import os
import pandas as pd
from pprint import pprint

In [2]:
## input
########

# pos       name                    description
# 1      frame number           frame in which the object is present
# 2      identity number        trajectory id (-1 default for no track)
# 3      bounding box x         x value from top left of bounding box
# 4      bounding box y         y value from top left of bounding box
# 5      bounding box width     width of bounding box in pixels
# 6      bounding box height    height of bounding box in pixels
# 7      confidence score*      class detection confidence (gt: 1 or 0)
# 8      class*                 type of class (1 for pedestrian)
# 9      visibility*            percent visible (percent occluded = 1-visibility)

#        *no need to output these values, will be ignore by evaluator

# example

# 1,1,1363,569,103,241,1,1,0.86014
# 2,1,1362,568,103,241,1,1,0.86173
# 1,2,371,410,80,239,1,1,1
# 2,2,371,408,80,239,1,1,1

In [3]:
## output
#########

# The frames list contains a list of annotated frames. Frames from groundtruth 
# and hypothesis are synchronized by the timestamp. Each annotation in the 
# annotation list consists of bounding box values (x, y, width, height), and an id. 
# The dco flag stands for do not care and can be used to mark hard to track targets,
# e.g. because of occlusion. Thus, a tracker which does not find the target will not
# be penalized, whereas a tracker which finds the target won't be punished (with a 
# false positive) either.
# (source: https://github.com/Videmo/pymot)

# example

# [
#     {
#         "frames": [
#             {
#                 "timestampe": 1.0,
#                 "num": 1,
#                 "class": "frame",
#                 "annotations": [
#                     {
#                         "dco": true, 
#                         "height": 241.0, 
#                         "width": 103.0, 
#                         "id": "1", 
#                         "y": 569.0, 
#                         "x": 1363.0 
#                     },
#                     {
#                         "dco": true, 
#                         "height": 239.0, 
#                         "width": 80.0, 
#                         "id": "2",
#                         "y": 410.0, 
#                         "x": 371.0 
#                     }
#                 ]
#             },
#             {
#                 "timestampe": 2.0,
#                 "num": 2,
#                 "class": "frame",
#                 "annotations": [
#                     {
#                         "dco": true, 
#                         "height": 241.0, 
#                         "width": 103.0, 
#                         "id": "1",
#                         "y": 568.0, 
#                         "x": 1362.0 
#                     },
#                     {
#                         "dco": true, 
#                         "height": 239.0, 
#                         "width": 80.0, 
#                         "id": "2",
#                         "y": 408.0, 
#                         "x": 371.0 
#                     }
#                 ]
#             }
#         ],
#         "class": "video", 
#         "filename": "/MOT17-04-FRCNN.mp4"
#     }   
# ]

In [4]:
fname = "gt"
in_path = "gt"
out_path = "output"
in_ext = "txt"
out_ext = "json"
in_path = os.path.join(in_path,"%s.%s"%(fname,in_ext))
out_path = os.path.join(out_path,"%s.%s"%(fname,out_ext))

In [5]:
header_list = ['frame','trajectory','x','y','w','h','dco','class','visibility']
dtype = {
    'frame':int,
    'trajectory':int,
    'x':int,
    'y':int,
    'w':int,
    'h':int,
    'dco':int,
    'class':int,
    'visibility':float}

df = pd.read_csv(in_path, names=header_list, dtype=dtype)

In [13]:
def convert_output(target_dir="output", target_ext="txt", dest_dir="output", dest_ext="json"):
    hypotheses = [fn for fn in os.listdir(target_dir) if fn.split('.')[-1] == target_ext]
    for hypothesis in hypotheses:
        
        if "gt.txt" in hypothesis:
            continue
        
        path = os.path.join(target_dir,hypothesis)
        header_list = ['frame','trajectory','x','y','w','h','dco','class','visibility']
        dtype = {
            'frame':int,
            'trajectory':int,
            'x':int,
            'y':int,
            'w':int,
            'h':int,
            'dco':int,
            'class':int,
            'visibility':float}

        df = pd.read_csv(in_path, names=header_list, dtype=dtype)
        
        first_frame_id = df["frame"].min()
        last_frame_id = df["frame"].max()
        total_frames = len(df["frame"].unique())

        if total_frames != last_frame_id - first_frame_id + 1:
            raise Exception("Missing frames")

        frames = []
        for frame_id in range(1,total_frames+1):

            annotations = []

            for row in df[df['frame']==1].itertuples(index=True, name='Pandas'):
                annotation = {
                    "dco": row.dco,
                    "height": row.h,
                    "width": row.w,
                    "id": row.trajectory,
                    "x": row.x,
                    "y": row.y
                }
                annotations.append(annotation)

            frame = {
                "hypotheses": annotations,
                "num": frame_id,
                "timestamp": frame_id*1.0,
                "class": "frame"
            }
            frames.append(frame)


        sloth = {
            "frames": frames,
            "class": "video", 
            "filename": "/MOT17-04-FRCNN.mp4"
        }
        
        fname = '.'.join(hypothesis.split('.')[:-1])
        out_path = os.path.join(dest_dir,"%s.%s" % (fname,dest_ext))
        
        with open(out_path, 'w') as outfile:
            json.dump([sloth], outfile)

convert_output()

In [6]:
first_frame_id = df["frame"].min()
last_frame_id = df["frame"].max()
total_frames = len(df["frame"].unique())

if total_frames != last_frame_id - first_frame_id + 1:
    raise Exception("Missing frames")

frames = []
for frame_id in range(1,total_frames+1):
    
    annotations = []
    
    for row in df[df['frame']==1].itertuples(index=True, name='Pandas'):
        annotation = {
            "dco": row.dco,
            "height": row.h,
            "width": row.w,
            "id": row.trajectory,
            "x": row.x,
            "y": row.y
        }
        annotations.append(annotation)
        
    frame = {
        "annotations": annotations,
        "num": frame_id,
        "timestamp": frame_id*1.0,
        "class": "frame"
    }
    frames.append(frame)
    
    
sloth = {
    "frames": frames,
    "class": "video", 
    "filename": "/MOT17-04-FRCNN.mp4"
}

In [7]:
with open('output/gt.json', 'w') as outfile:
    json.dump(sloth, outfile)