In [1]:
import os
import json
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np

In [2]:
root = "/raid/ly/Dataset/tianchi2021/user_data/tmp_data/round-2/"
vid_root = os.path.join(root, 'video_train')
ann_root = os.path.join(root, 'video_annos')
vid_nms = [x for x in os.listdir(vid_root) if not x.endswith('zip')]

In [3]:
gt_anns = []
for vid_nm in tqdm(vid_nms, total=len(vid_nms)):
    seqinfo_path = os.path.join(ann_root, vid_nm, 'seqinfo.json')
    tracks_path = os.path.join(ann_root, vid_nm, 'tracks.json')
    
    with open(seqinfo_path) as f:
        seqinfo = json.load(f)
    with open(tracks_path) as f:
        tracks = json.load(f)
    
    imgH, imgW = seqinfo['imHeight'], seqinfo['imWidth']
    
    for track in tracks:
        pid = track['track id']
        bboxes = track['frames']
        for bbox in bboxes:
            fid = bbox['frame id']
            img_nm = os.path.join(vid_nm, seqinfo['imUrls'][fid-1])
            rect = bbox['rect']
            x1 = rect['tl']['x'] * imgW
            y1 = rect['tl']['y'] * imgH
            x2 = rect['br']['x'] * imgW
            y2 = rect['br']['y'] * imgH
            occ = bbox['occlusion']
            gt_anns.append([img_nm, fid, pid, x1, y1, x2, y2, imgH, imgW,occ])

  0%|          | 0/10 [00:00<?, ?it/s]

In [4]:
gt_df = pd.DataFrame(gt_anns, columns=['img_nm', 'fid', 'pid', 'x1', 'y1', 'x2', 'y2', 'imgH', 'imgW', 'occ'])

In [5]:
gt_df.occ.fillna('disappear', inplace=True)
gt_df.occ.replace('', 'disappear', inplace=True)
gt_df.loc[gt_df.y2 > gt_df.imgH, ['y2']] = gt_df.loc[gt_df.y2 > gt_df.imgH, ['imgH']] - 1
gt_df.loc[gt_df.y1 < 0, ['y1']] = 0
gt_df.loc[gt_df.x2 > gt_df.imgW, ['x2']] = gt_df.loc[gt_df.x2 > gt_df.imgW, ['imgW']] - 1
gt_df.loc[gt_df.x1 < 0, ['x1']] = 0
gt_df['h'] = gt_df.y2 - gt_df.y1
gt_df['w'] = gt_df.x2 - gt_df.x1
gt_df['area'] = gt_df.h * gt_df.w
gt_df = gt_df[gt_df['area'] > 25]
gt_df.to_csv("round2_gt.csv", index=False, header=None)

In [6]:
gt_df = pd.read_csv('round2_gt.csv', names=['img_nm', 'fid', 'pid', 'x1', 'y1', 'x2', 'y2', 'imgH', 'imgW', 'occ', 'h', 'w', 'area'])
gt_df = gt_df[(gt_df.occ == 'normal') | (gt_df.occ == 'hide')]

In [7]:
tgtfile = '../../user_data/tmp_data/detection/annotations/panda_coco_round2.json'

In [9]:
attrDict = dict()
attrDict["categories"] = [
    {"supercategory": "none", "id": 1, "name": 'full body'},
]
images = list()
annotations = list()
imageids = list()

objid = 1

imgid = 0
for _, sub_df in gt_df.groupby('img_nm'):
    image = dict()
    image['file_name'] = sub_df.img_nm.unique()[0]
    imageids.append(imgid)
    imgwidth = sub_df.imgW.unique()[0]
    imgheight = sub_df.imgH.unique()[0]
    image['height'] = int(imgheight)
    image['width'] = int(imgwidth)
    image['id'] = imgid
    images.append(image)

    for ann_id, ann in sub_df.iterrows():
        cate = 'full body'
        annotation = dict()
        annotation["image_id"] = imgid
        annotation["ignore"] = 0
        annotation["iscrowd"] = 0
        x,y,w,h = int(ann.x1), int(ann.y1), int(ann.w), int(ann.h)
        annotation["bbox"] = [x, y, w, h]
        annotation["area"] = float(w * h)
        annotation["category_id"] = 1
        annotation["id"] = objid
        objid += 1
        annotation["segmentation"] = [[x, y, x, (y + h), (x + w), (y + h), (x + w), y]]
        annotations.append(annotation)
    imgid += 1
    
attrDict["images"] = images
attrDict["annotations"] = annotations
attrDict["type"] = "instances"     

# print attrDict
jsonString = json.dumps(attrDict, indent=2)
with open(tgtfile, "w") as f:
    f.write(jsonString)