# Create INCAR and INVICON MOLA JSON
version: 1

info:
- Create standard MOLA JSON

author: nuno costa

## MOLA Annotations Data Format

If you wish to combine multiple datasets, it is often useful to convert them into a unified data format. 

Objective: 

In [2]:

 #ANNOTATIONS FORMAT (BASED ON COCO)

 #Annotations format keys:

{ "info": None, 
"licenses": [], #TODO
"categories": [], 
"images": [],
"annotations": [],
"videos": [], 
"video_annotations": [], #TODO
"tracks": [], #TODO - only for Object Tracking
"segment_info": [], #TODO
"datasets": [{'name': 'INCAR', 'id': 1}, {'name': 'INVICON', 'id': 2}] 
}

#1 object definition:

info: {
 "year": int, 
 "version": str, 
 "description": str, 
 "contributor": str, 
 "url": str, 
 "date_created": datetime,
}

license: {
 "id": int, 
 "name": str, 
 "url": str,
}

category: {
 "id": int, 
 "name": str, 
 "supercategory": str,
 "dataset": int, #dataset_id
}

image: {
 "id" : int,
 "video_id": int, 
 "file_name" : str,
 "license" : int,
 "dataset": int, #dataset_id
 # Redundant fields for COCO-compatibility
 "width": int,
 "height": int,
 "frame_index": int, #frame index from original video_id
 "date_captured": datetime,
}

annotation: { #rawframes annotation
 "category_id": int
 "image_id": int,
 #"track_id": int, # NOT FOR ACTION, ONLY FOR OBJECT TRACKING
 "bbox": [x,y,width,height],
 "area": float,
 "dataset": int, #dataset_id
 # Redundant field for compatibility with COCO scripts
 "id": int,
 "iscrowd": 0 or 1,  (iscrowd=1) are used to label large groups of objects (e.g. a crowd of people)
 "segmentation": RLE(iscrowd=1) or [polygon](iscrowd=0), 

}

video: { 
 "id": int,
 "name": str,
 "width" : int,
 "height" : int,
 "total_frames": int, # TOTAL NUMBER OF FRAMES OF THE VIDEO
 "fps": int,
 "dataset": int, #dataset_id
 #"metadata": dict,  # Metadata about the video - NOT NECESSARY ADDITIONAL DICT
}

video_annotation: { #TODO
 "id": int,
 "category_id": int, #label
 "video_id": int,
 "frame_start": int, #in frames, then it can be converted using the fps
 "frame_end":int, #in frames
 "label_frames": int, # TOTAL NUMBER OF FRAMES OF LABEL category_id
 "dataset": int, #dataset_id
}



segment: { #TODO
 "id": int, 
 "category_id": int, 
 "area": int, 
 "bbox": [x,y,width,height], 
 # Redundant field for compatibility with COCO scripts
 "iscrowd": 0 or 1,
}


track: { #DOES IT MAKE SENSE TO TRACT ACTIONS INSIDE THE VIDEO? NO- ONLY OBJECTS
 "id": int,
 "category_id": int,
 "video_id": int
}

SyntaxError: invalid syntax (<ipython-input-2-580a7c10cdc0>, line 7)

## SETUP

In [8]:
from annotate_v5 import *
import platform 
import json
import os
from itertools import groupby

In [9]:
#Define root dir dependent on OS
rdir='D:/external_datasets/MOLA/' #WARNING needs to be root datasets 
print('OS: {}'.format(platform.platform()))
if str(platform.platform()).upper().find('linux'.upper())>-1: rdir="/home/administrator/server_data_ext/Recordings/EASYRIDE/P19/" #'/mnt/d/external_datasets/'
print('root dir: {}'.format(rdir))
print('OS separator: {}'.format(os.path.sep))

OS: Linux-5.4.0-80-generic-x86_64-with-glibc2.10
root dir: /home/administrator/server_data_ext/Recordings/EASYRIDE/P19/
OS separator: /


# SINGLE CASE STUDY

### INIT motionLAB JSON
- uses annotate.init_json() function

In [46]:
res={
    'rgb': [2048, 1536], #w,h
    'thermal': [640,512],
    'pointcloud': [640,576],
    'fps': 30
}

In [44]:
def init_json(file='mola.json'):
    output = {
        "info": None,
        "licenses": [],
        "categories": [],
        "videos": [],
        "images": [],
        "tracks": [],
        "segment_info": [],
        "annotations": [],
        "video_annotations": [],
        "datasets": [] #[{'name': 'COCO', 'id': 1}, {'name': 'TAO', 'id': 2}]
    }
    output['info'] = {
        "description": "MOLA Dataset",
        "url": "",
        "version": "1",
        "year": 2021,
        "date_created": datetime.datetime.utcnow().isoformat(' ')
    }

    with open(file, 'w') as f:
        json.dump(output, f)
    print("JSON INITIATED : {}".format(file))

In [45]:
molafile=rdir+'INCAR/'+'mola.json'
init_json(file=molafile)
molajson =  json.load(open(molafile))
molajson['datasets']=[{'name': 'INCAR', 'id': 1}]
with open(molafile, 'w') as f:
    json.dump(molajson, f)
for k in molajson:
    print(k, len(molajson[k]))

JSON INITIATED : /home/administrator/server_data_ext/Recordings/EASYRIDE/P19/INCAR/mola.json
info 5
licenses 0
categories 0
videos 0
images 0
tracks 0
segment_info 0
annotations 0
video_annotations 0
datasets 1


### IMPORT JSON LABELS

In [18]:
file=rdir+"INCAR/20210506/Session 1/C8_P6_P5_1/gt/gt2.json"
gt=json.load(open(file))
mergedjson = json.load(open(rdir+'/annotations/splitann_mola_fix_equal_reorder_cleanclass_cleanimg/mix_coco_and_tao_aggressive/test.json')) #EXAMPLES

FileNotFoundError: [Errno 2] No such file or directory: '/home/administrator/server_data_ext/Recordings/EASYRIDE/P19//annotations/splitann_mola_fix_equal_reorder_cleanclass_cleanimg/mix_coco_and_tao_aggressive/test.json'

In [43]:
def parse_path(path):
    parsed_path = path.replace('\\', '/')
    parsed_path = parsed_path.replace('\ ', '/')
    return parsed_path

In [42]:
def fix_pahts(gt):
    #fix gt datasource
    paths=gt['gTruth']['DataSource']
    if isinstance(paths, dict) and 'Source' in paths: paths=paths['Source']
    originalpath=paths[0]
    for p in paths:
        if p.find("gt") >-1 : 
            originalpath=p
            break
    originalpath=parse_path(originalpath)
    paths=[parse_path(p) for p in paths]
    paths = ['/'.join(originalpath.split('/')[:-1]+[p.split('/')[-1]]) if p.find("MATLAB") > -1 else p for p in paths]  #remove MATLAB BUG: 'C:\\Tools\\MATLAB\\R2020a\\examples\\symbolic\\data\\196.png'
    paths = ['/'.join(p.split('/')[-7:]) for p in paths] #remove root dir 
    gt['gTruth']['DataSource']=paths
    return gt
gt=fix_pahts(gt)

#### CATEGORIES IMPORT

In [21]:
#EXAMPLE
display(mergedjson["categories"][0])

NameError: name 'mergedjson' is not defined

In [22]:
gt['gTruth']['LabelDefinitions']

[{'Name': 'VIOLENT',
  'Type': 'Scene',
  'LabelColor': [0, 0.7241, 0.6552],
  'Group': 'None',
  'Description': ''}]

In [41]:
def import_categories(molajson, gt, start_id=0):
    dataset=molajson["datasets"][0]['id']
    # IMPORT categories name and id
    cat_l=[]
    cat_l_id=[]
    cat_l_dset=[]
    cat=gt['gTruth']['LabelDefinitions']
    for i,c in enumerate(tqdm(cat)):
        cat_l.append(c['Name'])
        cat_l_id.append(start_id+i+1) # id start from 1
        cat_l_dset.append(dataset) # dataset index
        molajson['categories'].append({'name':cat_l[i],'id':cat_l_id[i],'dataset':cat_l_dset[i]})
    # ADDITIONAL CATEGORIES: MANUAL
    name='NONVIOLENT'
    cid=len(cat_l)+1
    molajson['categories'].append({'name':name,'id':cid,'dataset':dataset})
    cat_l.append(name)
    cat_l_id.append(cid)
    cat_l_dset.append(dataset)
    print("\n>> categories:\n", molajson['categories'][-2:])
    return molajson, cat_l, cat_l_id, cat_l_dset
molajson, cat_l, cat_l_id, cat_l_dset=import_categories(molajson, gt)

100%|██████████| 1/1 [00:00<00:00, 13273.11it/s]


>> categories:
 [{'name': 'VIOLENT', 'id': 1, 'dataset': 1}, {'name': 'NONVIOLENT', 'id': 2, 'dataset': 1}]





#### VIDEO IMPORT 

In [40]:
def import_videos(molajson, gt, res, start_id=0, sensor="rgb", ext=".mp4"):
    dataset=molajson["datasets"][0]['id']
    #single-level:
    vid=start_id+1
    video_l=[]
    video_l_id=[]
    total_frames=len(gt['gTruth']['DataSource'])
    videon='_'.join(gt['gTruth']['DataSource'][0].split('/')[:-3])+'_'+sensor+ext #f'video_{vid}_{sensor}{ext}' 
    videon=videon.replace(' ','_') # remove trailing spaces in "Session 1"
    video='/'.join(gt['gTruth']['DataSource'][0].split('/')[:-3])+'/'+'gt'+'/'+videon
    video_l.append(video)
    video_l_id.append(vid)
    i=0 #no loop
    molajson['videos'].append({'name':video_l[i],
                               'id':video_l_id[i],
                               'width': res[sensor][0],
                               'height': res[sensor][1],
                               'sensor': sensor,
                               'fps': res['fps'],
                               'total_frames': total_frames,
                               'dataset':dataset})
    print("\n>> video:\n", molajson['videos'])
    return molajson, video_l, video_l_id
molajson, video_l, video_l_id=import_videos(molajson, gt, res)


>> video:
 [{'name': 'INCAR/20210506/Session 1/C8_P6_P5_1/gt/INCAR_20210506_Session_1_C8_P6_P5_1_rgb.mp4', 'id': 1, 'width': 2048, 'height': 1536, 'sensor': 'rgb', 'fps': 30, 'total_frames': 590, 'dataset': 1}, {'name': 'INCAR/20210506/Session 1/C8_P6_P5_1/gt/INCAR_20210506_Session_1_C8_P6_P5_1_rgb.mp4', 'id': 1, 'width': 2048, 'height': 1536, 'sensor': 'rgb', 'fps': 30, 'total_frames': 590, 'dataset': 1}]


#### IMAGE IMPORT

In [50]:
#EXAMPLE
display(mergedjson["images"][0])

{'license': 3,
 'file_name': 'COCO/2017/images/train2017/000000391895.jpg',
 'coco_url': 'http://images.cocodataset.org/train2017/000000391895.jpg',
 'height': 360,
 'width': 640,
 'date_captured': '2013-11-14 11:18:45',
 'flickr_url': 'http://farm9.staticflickr.com/8186/8119368305_4e622c8349_z.jpg',
 'id': 1,
 'caption': 'A man with a red helmet on a small moped on a dirt road. ',
 'dataset': 1}

In [25]:
gt['gTruth']['DataSource'][0]

'INCAR/20210506/Session 1/C8_P6_P5_1/gt/rgb/1.png'

In [39]:
def import_images(molajson, gt, res, start_id=0, video_id=1, sensor="rgb"):
    dataset=molajson["datasets"][0]['id']
    # images filepath and id
    img_l=[]
    img_l_id=[]
    img=gt['gTruth']['DataSource']
    for i,im in enumerate(tqdm(img)):
        img_l.append(im)
        img_l_id.append(start_id+i+1) # id start from 1
        frame_index=img_l[i].split('/')[-1]
        frame_index=int(frame_index.split('.')[0])
        molajson['images'].append({'file_name':img_l[i],
                                   'id':img_l_id[i],
                                   'video_id':video_id,
                                   'caption':img_l[i].split('/')[-4], # scenario
                                   'width': res[sensor][0],
                                   'height': res[sensor][1],
                                   "frame_index": frame_index,
                                   "date_captured": img_l[i].split('/')[-6],
                                   'dataset':dataset})
    print("\n>> images:\n", molajson['images'][-2:])
    return molajson, img_l, img_l_id
molajson, img_l, img_l_id=import_images(molajson, gt, res, video_id=video_l_id[-1])

100%|██████████| 590/590 [00:00<00:00, 397468.58it/s]


>> images:
 [{'file_name': 'INCAR/20210506/Session 1/C8_P6_P5_1/gt/rgb/589.png', 'id': 589, 'video_id': 1, 'caption': 'C8_P6_P5_1', 'width': 2048, 'height': 1536, 'frame_index': 589, 'date_captured': '20210506', 'dataset': 1}, {'file_name': 'INCAR/20210506/Session 1/C8_P6_P5_1/gt/rgb/590.png', 'id': 590, 'video_id': 1, 'caption': 'C8_P6_P5_1', 'width': 2048, 'height': 1536, 'frame_index': 590, 'date_captured': '20210506', 'dataset': 1}]





#### CREATE ANNOTATIONS

In [53]:
#EXAMPLE
display(mergedjson["annotations"][1000])

{'segmentation': [[619, 463, 761, 463, 761, 535, 619, 535]],
 'bbox': [619, 463, 142, 72],
 'area': 10224,
 'iscrowd': 0,
 'id': 1243036,
 'image_id': 146427,
 'category_id': 1,
 'track_id': 3385,
 '_scale_uuid': 'd83e8588-2744-4f9b-a14c-b6c8129cd696',
 'scale_category': 'moving object',
 'video_id': 626,
 'dataset': 2}

In [27]:
gt['gTruth']['LabelData'][0]

{'VIOLENT': False}

In [38]:
def create_annotations(molajson, gt, res, cat_l, cat_l_id, cat_l_dset, img_l_id, start_id=0, sensor="rgb"):
    dataset=molajson["datasets"][0]['id']
    # annotations category_id, image_id, bbox, and dataset
    ann_id=[]
    ann_catid=[]
    ann_imgid=[]
    ann_bbox=[]
    ann_dset=[]
    labels=gt['gTruth']['LabelData']
    for i,l in enumerate(tqdm(labels)):
        annid=start_id+i+1
        current_label=list(l.keys())[0]
        if current_label=='VIOLENT' and not l["VIOLENT"]: current_label="NONVIOLENT" #specific rule of gt labelling
        catidx=cat_l.index(current_label)
        catid=cat_l_id[catidx]
        imgidx=i #frame
        imgid=img_l_id[imgidx]
        bbox=[0, 0, res[sensor][0], res[sensor][1]] # [x,y,width,height], #default RGB
        area=res[sensor][0]*res[sensor][1] #default RGB
        ann_id.append(annid)
        ann_catid.append(catid)
        ann_imgid.append(imgid)
        ann_bbox.append(bbox)
        ann_dset.append(dataset)
        molajson['annotations'].append({'id':annid,
                                        'category_id':catid,
                                        'image_id':imgid,
                                        'bbox': bbox,
                                        'area': area,
                                        'iscrowd': 0,
                                        'dataset':dataset})
    print("\n>> annotations:\n", molajson['annotations'][-2:])
    return molajson, ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset
molajson, ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset=create_annotations(molajson, gt,res, cat_l, cat_l_id, cat_l_dset, img_l_id)

100%|██████████| 590/590 [00:00<00:00, 385277.81it/s]


>> annotations:
 [{'id': 589, 'category_id': 1, 'image_id': 589, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 590, 'category_id': 1, 'image_id': 590, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}]





In [36]:
print("\n>> annotations:\n", molajson['annotations'][-5:])


>> annotations:
 [{'id': 586, 'category_id': 1, 'image_id': 586, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 587, 'category_id': 1, 'image_id': 587, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 588, 'category_id': 1, 'image_id': 588, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 589, 'category_id': 1, 'image_id': 589, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 590, 'category_id': 1, 'image_id': 590, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}]


In [37]:
def create_video_annotations(molajson, gt, res, cat_l, cat_l_id, cat_l_dset, video_l_id, start_id=0, sensor="rgb"):
    dataset=molajson["datasets"][0]['id']
    # annotations category_id, image_id, bbox, and dataset
    ann_id=[]
    ann_catid=[]
    ann_videoid=[]
    ann_dset=[]
    labels=gt['gTruth']['LabelData']
    #extract frames and correspondent labels
    labels_l=[]
    frames_l=[]
    previous_label=''
    frames_per_label=[]
    for i,l in enumerate(labels): #UNSPECIFIC LABEL EXTRACTOR
        current_label=list(l.keys())[0]
        if current_label=='VIOLENT' and not l["VIOLENT"]: current_label="NONVIOLENT" #specific rule of gt labelling
        if previous_label != current_label or i+1>=len(labels):
            if i+1>=len(labels): frames_per_label.append(i+1)
            if frames_per_label: frames_l.append(frames_per_label)
            if previous_label !='': labels_l.append(previous_label)
            previous_label=current_label
            frames_per_label=[]
        frames_per_label.append(i+1)
    #create video annotations
    for i,c in enumerate(labels_l):
        annid=start_id+i+1
        catidx=cat_l.index(c)
        label_frames=frames_l[i]
        if not label_frames: continue #no frames
        catid=cat_l_id[catidx] 
        videoidx=0 #only one video per scenario
        videoid=video_l_id[videoidx]
        ann_id.append(annid)
        ann_catid.append(catid)
        ann_videoid.append(videoid)
        ann_dset.append(dataset)
        molajson['video_annotations'].append({'id':annid,
                                        'category_id':catid,
                                        'video_id':videoid,
                                        'frame_start': int(label_frames[0]), #in frames, then it can be converted using the fps
                                        'frame_end': int(label_frames[-1]), #in frames
                                        "label_frames": len(label_frames),
                                        'dataset':dataset})
    print("\n>> video_annotations:\n", molajson['video_annotations'][-2:])
    """  
    frames_violent=[i+1 for i,l in enumerate(labels) if l["VIOLENT"]]
    frames_nonviolent=[i+1 for i,l in enumerate(labels) if not l["VIOLENT"]]
    for i,c in enumerate(tqdm(cat_l)):
        annid=start_id+i+1
        catidx=i
        #specific - TODO unspecific
        label_frames=frames_violent
        if c=="NONVIOLENT": label_frames=frames_nonviolent
        if not label_frames: continue #no frames of this category, therefore video of this category
        catid=cat_l_id[catidx]
        #dataset=cat_l_dset[catidx]
        videoidx=0 #only one video per scenario
        videoid=video_l_id[videoidx]
        ann_id.append(annid)
        ann_catid.append(catid)
        ann_videoid.append(videoid)
        ann_dset.append(dataset)
        molajson['video_annotations'].append({'id':annid,
                                        'category_id':catid,
                                        'video_id':videoid,
                                        'frame_start': int(label_frames[0]), #in frames, then it can be converted using the fps
                                        'frame_end': int(label_frames[-1]), #in frames
                                        "label_frames": len(label_frames),
                                        'dataset':dataset})
    print("\n>> video_annotations:\n", molajson['video_annotations'][-2:])"""
    return molajson, ann_id, ann_catid, ann_videoid, ann_dset
molajson, ann_id, ann_catid, ann_videoid, ann_dset=create_video_annotations(molajson, gt,res, cat_l, cat_l_id, cat_l_dset, video_l_id)

100%|██████████| 2/2 [00:00<00:00, 31775.03it/s]


>> video_annotations:
 [{'id': 1, 'category_id': 2, 'video_id': 1, 'frame_start': 1, 'frame_end': 358, 'label_frames': 358, 'dataset': 1}, {'id': 2, 'category_id': 1, 'video_id': 1, 'frame_start': 359, 'frame_end': 590, 'label_frames': 232, 'dataset': 1}]





# MULTI CASE STUDY (For loop script)

In [None]:
datasets=['INCAR', 'INVICON']
rdir=rdir
# FOR LOOP"
datasetsdir = os.listdir(rdir)
missing_gt_json=[]
missing_gt_mat=[]
label_folder="gt"
label_fname="gt2.json"
label_mat_fname="gt.m"
sensor="rgb"
ext=".mp4"
did=1 #start dataset id
for dataset in datasetsdir:
    if dataset in datasets:
        daysdir = os.path.join(rdir, dataset)
        if not os.path.isdir(daysdir): continue  # test if is a folder
        days = os.listdir(daysdir)
        print(">>>\n EXTRACTING DATASET: "+dataset)
        #INIT JSON
        molafile=rdir+dataset+'/'+'mola.json'
        init_json(file=molafile)
        molajson =  json.load(open(molafile))
        molajson['datasets'] = [{'name': dataset, 'id': did}] #[{'name': d, 'id': i+1} for i,d in enumerate(datasets)]
        did+=1 #nem dataset added
        with open(molafile, 'w') as f:
            json.dump(molajson, f)
        #INIT VARS
        imported_cats = False # import cats from each dataset
        cat_start_id = 0
        video_start_id = 0
        img_start_id = 0
        ann_start_id = 0
        vid_ann_start_id = 0
        cat_l, cat_l_id, cat_l_dset = [], [], []
        video_l, video_l_id = [], []
        img_l, img_l_id = [], []
        ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset = [], [], [], [], []
        vid_ann_id, vid_ann_catid, ann_videoid, vid_ann_dset = [], [], [], []
        #FOR LOOP
        for day in days:
            sessiondir = os.path.join(daysdir, day)
            if not os.path.isdir(sessiondir): continue  # test if is a folder
            sessions = os.listdir(sessiondir)
            for session in sessions:
                scenariosdir = os.path.join(sessiondir, session)
                if not os.path.isdir(scenariosdir): continue  # test if is a folder
                scenarios = os.listdir(scenariosdir)
                for scenario in scenarios:
                    imgdir = os.path.join(scenariosdir, scenario)
                    if not os.path.isdir(imgdir): continue  # test if is a folder
                    labeldir = os.path.join(imgdir, label_folder)
                    # if not os.path.isdir(labeldir): continue #should exist
                    filename = os.path.join(labeldir, label_fname)
                    try:
                        gt = json.load(open(filename))
                        # fix gt paths
                        gt = fix_pahts(gt) #gTruth can be also missing missing
                    except:
                        print(">>>>>>>MISSING OR BUG gtFILE: ", filename)
                        missing_gt_json.append(filename)
                        if not os.path.isfile(filename.replace(label_fname, label_mat_fname)): missing_gt_mat.append(filename.replace(label_fname, label_mat_fname))
                        continue
                    # update molajson
                    if not imported_cats:  # only imports one time
                        molajson, cat_l, cat_l_id, cat_l_dset = import_categories(molajson, gt, start_id=cat_start_id)
                        imported_cats = True        
                    molajson, video_l, video_l_id=import_videos(molajson, gt, res,
                                                                start_id=video_start_id,
                                                                sensor=sensor,
                                                                ext=ext)
                    molajson, img_l, img_l_id=import_images(molajson, gt, res, 
                                                            start_id=img_start_id, 
                                                            video_id=video_l_id[-1])
                    molajson, ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset = create_annotations(molajson, gt, res,
                                                                                                    cat_l, cat_l_id,
                                                                                                    cat_l_dset, img_l_id,
                                                                                                    start_id=ann_start_id,
                                                                                                    sensor=sensor)
                    molajson, vid_ann_id, vid_ann_catid, ann_videoid, vid_ann_dset=create_video_annotations(molajson, gt,res, 
                                                                                                cat_l, cat_l_id, 
                                                                                                cat_l_dset, video_l_id,
                                                                                                start_id=vid_ann_start_id,
                                                                                                sensor=sensor)
                    # update start ids to the last id
                    cat_start_id = cat_l_id[-1]
                    video_start_id = video_l_id[-1]
                    img_start_id = img_l_id[-1]
                    ann_start_id = ann_id[-1]
                    vid_ann_start_id = vid_ann_id[-1]

        # results
        for k in molajson:
            print(k, len(molajson[k]))

        # # Save
        print('\n >> SAVING...')
        jsonfile=molafile
        with open(jsonfile, 'w') as f:
            json.dump(molajson, f)
        with open(jsonfile.replace('.json', '_missing_gtmat.txt'),'w') as f:
            f.write(str(missing_gt_mat))
        with open(jsonfile.replace('.json', '_missing_gtjson.txt'),'w') as f:
            f.write(str(missing_gt_json))
        print("JSON SAVED : {} \n".format(jsonfile))

        #retest results
        molajson =  json.load(open(molafile))
        for k in molajson:
            print(k, len(molajson[k]))

In [53]:
molafile=rdir+'INCAR/'+'mola.json'
print(molafile)
molajson =  json.load(open(molafile))
for k in molajson:
    print(k, len(molajson[k]))

/home/administrator/server_data_ext/Recordings/EASYRIDE/P19/INCAR/mola.json
info 5
licenses 0
categories 2
videos 760
images 443433
tracks 0
segment_info 0
annotations 443433
video_annotations 1595
datasets 1


In [49]:
molafile=rdir+'INVICON/'+'mola.json'
molajson =  json.load(open(molafile))
for k in molajson:
    print(k, len(molajson[k]))

info 5
licenses 0
categories 2
videos 763
images 388490
tracks 0
segment_info 0
annotations 388490
video_annotations 1524
datasets 1


In [54]:
molajson["datasets"][-1]

{'name': 'INCAR', 'id': 1}

In [55]:
molajson["categories"][-1]

{'name': 'NONVIOLENT', 'id': 2, 'dataset': 1}

In [56]:
molajson["videos"][13]

{'name': 'INCAR/20210422/Session1/C1_P7_P8_2/gt/INCAR_20210422_Session1_C1_P7_P8_2_rgb.mp4',
 'id': 14,
 'width': 2048,
 'height': 1536,
 'sensor': 'rgb',
 'fps': 30,
 'total_frames': 590,
 'dataset': 1}

In [57]:
molajson["images"][0]

{'file_name': 'INCAR/20210422/Session1/C10_P7_P8_1/gt/rgb/1.png',
 'id': 1,
 'video_id': 1,
 'caption': 'C10_P7_P8_1',
 'width': 2048,
 'height': 1536,
 'frame_index': 1,
 'date_captured': '20210422',
 'dataset': 1}

In [58]:
molajson["images"][-1]

{'file_name': 'INCAR/20210521/Session 2/C9_P4_P3_2/gt/rgb/566.png',
 'id': 443433,
 'video_id': 760,
 'caption': 'C9_P4_P3_2',
 'width': 2048,
 'height': 1536,
 'frame_index': 566,
 'date_captured': '20210521',
 'dataset': 1}

In [59]:
molajson["annotations"][-1]

{'id': 443433,
 'category_id': 1,
 'image_id': 443433,
 'bbox': [0, 0, 2048, 1536],
 'area': 3145728,
 'iscrowd': 0,
 'dataset': 1}

In [60]:
molajson["annotations"][0]

{'id': 1,
 'category_id': 2,
 'image_id': 1,
 'bbox': [0, 0, 2048, 1536],
 'area': 3145728,
 'iscrowd': 0,
 'dataset': 1}

In [64]:
molajson["video_annotations"][32]

{'id': 33,
 'category_id': 1,
 'video_id': 15,
 'frame_start': 310,
 'frame_end': 556,
 'label_frames': 247,
 'dataset': 1}

In [67]:
# create video ann dict {video_id_category_id: video_annotations}
videos_ann = {'%g' % x['id'] +'_'+'%g' % x['video_id'] +'_'+'%g' % x['category_id']: x for x in molajson['video_annotations']}
k_14_2=[k for k in list(videos_ann.keys()) if k.find('_14_2')>-1]
for k in k_14_2:
    display(videos_ann[k])

{'id': 29,
 'category_id': 2,
 'video_id': 14,
 'frame_start': 1,
 'frame_end': 290,
 'label_frames': 290,
 'dataset': 1}

{'id': 31,
 'category_id': 2,
 'video_id': 14,
 'frame_start': 565,
 'frame_end': 590,
 'label_frames': 26,
 'dataset': 1}