# Create INCAR and INVICON MOLA JSON
version: 1

info:
- Create standard MOLA JSON

author: nuno costa

## MOLA Annotations Data Format

If you wish to combine multiple datasets, it is often useful to convert them into a unified data format. 

Conventions:

In [1]:

 #ANNOTATIONS FORMAT (BASED ON COCO)

 #Annotations format keys:

{ "info": None, 
"licenses": [], #TODO
"categories": [], #NOT ZERO-PADDED (from 1 to n) WARNING: Labelling in most algorithms is zero padded , be careful 
"images": [],
"annotations": [],
"videos": [], 
"video_annotations": [], #TODO
"tracks": [], #TODO - only for Object Tracking
"segment_info": [], #TODO
"datasets": [{'name': 'INCAR', 'id': 1}, {'name': 'INVICON', 'id': 2}] #NOT ZERO-PADDED (from 1 to n) 
}

#IMPORTANT: CONVENTION, no zero-padding, you need to implement the zero padding afterwards (most algorithms like yolo and mmaction2 need zero-padding)

#1 object definition:

info: {
 "year": int, 
 "version": str, 
 "description": str, 
 "contributor": str, 
 "url": str, 
 "date_created": datetime,
}

license: {
 "id": int, 
 "name": str, 
 "url": str,
}

category: {
 "id": int, 
 "name": str, 
 "supercategory": str,
 "dataset": int, #dataset_id
}

image: {
 "id" : int,
 "video_id": int, 
 "file_name" : str,
 "license" : int,
 "dataset": int, #dataset_id
 # Redundant fields for COCO-compatibility
 "width": int,
 "height": int,
 "frame_index": int, #frame index from original video_id
 "date_captured": datetime,
}

annotation: { #rawframes annotation
 "category_id": int
 "image_id": int,
 #"track_id": int, # NOT FOR ACTION, ONLY FOR OBJECT TRACKING
 "bbox": [x,y,width,height],
 "area": float,
 "dataset": int, #dataset_id
 # Redundant field for compatibility with COCO scripts
 "id": int,
 "iscrowd": 0 or 1,  (iscrowd=1) are used to label large groups of objects (e.g. a crowd of people)
 "segmentation": RLE(iscrowd=1) or [polygon](iscrowd=0), 

}

video: { 
 "id": int,
 "name": str,
 "width" : int,
 "height" : int,
 "total_frames": int, # TOTAL NUMBER OF FRAMES OF THE VIDEO
 "fps": int,
 "dataset": int, #dataset_id
 #"metadata": dict,  # Metadata about the video - NOT NECESSARY ADDITIONAL DICT
}

video_annotation: { #TODO
 "id": int,
 "category_id": int, #label
 "video_id": int,
 "frame_start": int, #in frames, then it can be converted using the fps
 "frame_end":int, #in frames
 "label_frames": int, # TOTAL NUMBER OF FRAMES OF LABEL category_id
 "dataset": int, #dataset_id
}



segment: { #TODO
 "id": int, 
 "category_id": int, 
 "area": int, 
 "bbox": [x,y,width,height], 
 # Redundant field for compatibility with COCO scripts
 "iscrowd": 0 or 1,
}


track: { #DOES IT MAKE SENSE TO TRACT ACTIONS INSIDE THE VIDEO? NO- ONLY OBJECTS
 "id": int,
 "category_id": int,
 "video_id": int
}

SyntaxError: invalid syntax (1066840000.py, line 58)

## SETUP

In [107]:
from annotate_v5 import *
import platform 
import json
import os
import sys
from itertools import groupby

In [108]:
#Define root dir dependent on OS
rdir='D:/external_datasets/MOLA/' #WARNING needs to be root datasets 
print('OS: {}'.format(platform.platform()))
if str(platform.platform()).upper().find('linux'.upper())>-1: rdir="/mnt/Data-Ext/Recordings/EASYRIDE/P19/" #'/mnt/d/external_datasets/'
print('root dir: {}'.format(rdir))
print('OS separator: {}'.format(os.path.sep))

OS: Linux-5.4.0-99-generic-x86_64-with-glibc2.10
root dir: /mnt/Data-Ext/Recordings/EASYRIDE/P19/
OS separator: /


# SINGLE CASE STUDY

### INIT motionLAB JSON
- uses annotate.init_json() function

In [109]:
res={
    'rgb': [2048, 1536], #w,h
    'thermal': [640,512],
    'pointcloud': [640,576],
    'fps': 30
}

In [110]:
def init_json(file='push_mola.json'):
    output = {
        "info": None,
        "licenses": [],
        "categories": [],
        "videos": [],
        "images": [],
        "tracks": [],
        "segment_info": [],
        "annotations": [],
        "video_annotations": [],
        "datasets": [] #[{'name': 'COCO', 'id': 1}, {'name': 'TAO', 'id': 2}] #Not zero-padded
    }
    output['info'] = {
        "description": "MOLA Dataset",
        "url": "",
        "version": "1",
        "year": 2021,
        "date_created": datetime.datetime.utcnow().isoformat(' ')
    }

    with open(file, 'w') as f:
        json.dump(output, f)
    print("JSON INITIATED : {}".format(file))

In [111]:
molafile=rdir+'INCAR/'+'push_mola.json'
init_json(file=molafile)
molajson =  json.load(open(molafile))
molajson['datasets']=[{'name': 'INCAR', 'id': 1}]
with open(molafile, 'w') as f:
    json.dump(molajson, f)
for k in molajson:
    print(k, len(molajson[k]))

JSON INITIATED : /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/push_mola.json
info 5
licenses 0
categories 0
videos 0
images 0
tracks 0
segment_info 0
annotations 0
video_annotations 0
datasets 1


### IMPORT JSON LABELS

In [112]:
file=rdir+"INCAR/20210422/Session1/C1_P7_P8_1/gt_nvs_push.json"
gt=json.load(open(file))
molaformatjson = json.load(open(rdir+'JSONS/INCAR/old/violent_incar_rgb.json')) #EXAMPLES

In [113]:
def parse_path(path):
    parsed_path = path.replace('\\', '/')
    parsed_path = parsed_path.replace('\ ', '/')
    return parsed_path

In [114]:
def fix_pahts(gt, remove_gt=True, dataset_root="INCAR"):
    #fix gt datasource
    paths=gt['gTruth2']['DataSource']
    if isinstance(paths, dict) and 'Source' in paths: paths=paths['Source']
    paths=[parse_path(p) for p in paths]
    #remove MATLAB BUG: 'C:\\Tools\\MATLAB\\R2020a\\examples\\symbolic\\data\\196.png'
    originalpath=paths[0]
    for p in paths: #verify it is not a matlab path the first one
        if not p.find("MATLAB") >-1 : 
            originalpath=p
            break
    originalpath=parse_path(originalpath)
    paths = ['/'.join(originalpath.split('/')[:-1]+[p.split('/')[-1]]) if p.find("MATLAB") > -1 else p for p in paths]  
    #remove root dir 
    paths = ['/'.join(p.split('/')[p.split('/').index(dataset_root):]) for p in paths] 
    #remove gt dir
    if remove_gt: paths = [p.replace("/gt/","/") if p.find('/gt/')>-1 else p for p in paths]
    gt['gTruth2']['DataSource']=paths
    return gt
gt=fix_pahts(gt)

#### CATEGORIES IMPORT

In [115]:
#EXAMPLE
display(molaformatjson["categories"][0])

{'name': 'VIOLENT', 'id': 1, 'dataset': 1}

In [116]:
gt['gTruth2']['LabelDefinitions']

[{'Name': 'pushing',
  'Type': 'Scene',
  'LabelColor': [0, 0.7241, 0.6552],
  'Group': 'None',
  'Description': ''}]

In [117]:
def import_categories(molajson, gt, start_id=0):
    dataset=molajson["datasets"][0]['id']
    # IMPORT categories name and id
    cat_l=[]
    cat_l_id=[]
    cat_l_dset=[]
    cat=gt['gTruth2']['LabelDefinitions']
    for i,c in enumerate(tqdm(cat)):
        cat_l.append(c['Name'])
        cat_l_id.append(start_id+i+1) # id start from 1
        cat_l_dset.append(dataset) # dataset index
        molajson['categories'].append({'name':cat_l[i],'id':cat_l_id[i],'dataset':cat_l_dset[i]})
    # ADDITIONAL CATEGORIES: MANUAL
    name='nonpushing'
    cid=len(cat_l)+1
    molajson['categories'].append({'name':name,'id':cid,'dataset':dataset})
    cat_l.append(name)
    cat_l_id.append(cid)
    cat_l_dset.append(dataset)
    print("\n>> categories:\n", molajson['categories'][-2:])
    return molajson, cat_l, cat_l_id, cat_l_dset
molajson, cat_l, cat_l_id, cat_l_dset=import_categories(molajson, gt)

100%|██████████| 1/1 [00:00<00:00, 11586.48it/s]


>> categories:
 [{'name': 'pushing', 'id': 1, 'dataset': 1}, {'name': 'nonpushing', 'id': 2, 'dataset': 1}]





#### VIDEO IMPORT 

In [118]:
def import_videos(molajson, gt, res, start_id=0, sensor="rgb", ext=".mp4"):
    dataset=molajson["datasets"][0]['id']
    #single-level:
    vid=start_id+1
    video_l=[]
    video_l_id=[]
    total_frames=len(gt['gTruth2']['DataSource'])
    #INCAR/20210521/Session 2/C9_P4_P3_2/rgb
    videon='_'.join(gt['gTruth2']['DataSource'][0].split('/')[:4])+'_'+sensor+ext #f'video_{vid}_{sensor}{ext}' 
    videon=videon.replace(' ','_') # remove trailing spaces in "Session 1"
    video='/'.join(gt['gTruth2']['DataSource'][0].split('/')[:4])+'/'+videon
    video_l.append(video)
    video_l_id.append(vid)
    i=0 #no loop
    molajson['videos'].append({'name':video_l[i],
                               'id':video_l_id[i],
                               'width': res[sensor][0],
                               'height': res[sensor][1],
                               'sensor': sensor,
                               'fps': res['fps'],
                               'total_frames': total_frames,
                               'dataset':dataset})
    print("\n>> video:\n", molajson['videos'][-2:])
    return molajson, video_l, video_l_id
molajson, video_l, video_l_id=import_videos(molajson, gt, res)


>> video:
 [{'name': 'INCAR/20210422/Session1/C1_P7_P8_1/INCAR_20210422_Session1_C1_P7_P8_1_rgb.mp4', 'id': 1, 'width': 2048, 'height': 1536, 'sensor': 'rgb', 'fps': 30, 'total_frames': 575, 'dataset': 1}]


#### IMAGE IMPORT

In [119]:
#EXAMPLE
display(molaformatjson["images"][0])

{'file_name': 'INCAR/20210422/Session1/C10_P7_P8_1/rgb/1.png',
 'id': 1,
 'video_id': 1,
 'caption': 'Session1',
 'width': 2048,
 'height': 1536,
 'frame_index': 1,
 'date_captured': 'INCAR',
 'dataset': 1}

In [120]:
gt['gTruth2']['DataSource']

['INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/1.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/2.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/3.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/4.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/5.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/6.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/7.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/8.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/9.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/10.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/11.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/12.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/13.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/14.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/15.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/16.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/17.jpg',
 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/18.jpg',
 'INCAR/20210422/Se

In [121]:
def import_images(molajson, gt, res, start_id=0, video_id=1, sensor="rgb"):
    dataset=molajson["datasets"][0]['id']
    # images filepath and id
    img_l=[]
    img_l_id=[]
    img=gt['gTruth2']['DataSource']
    for i,im in enumerate(tqdm(img)):
        img_l.append(im)
        img_l_id.append(start_id+i+1) # id start from 1
        frame_index=img_l[i].split('/')[-1]
        frame_index=int(frame_index.split('.')[0])
        molajson['images'].append({'file_name':img_l[i],
                                   'id':img_l_id[i],
                                   'video_id':video_id,
                                   'caption':img_l[i].split('/')[-4], # scenario
                                   'width': res[sensor][0],
                                   'height': res[sensor][1],
                                   "frame_index": frame_index,
                                   "date_captured": img_l[i].split('/')[-6],
                                   'dataset':dataset})
    print("\n>> images:\n", molajson['images'][-2:])
    return molajson, img_l, img_l_id
molajson, img_l, img_l_id=import_images(molajson, gt, res, video_id=video_l_id[-1])

100%|██████████| 575/575 [00:00<00:00, 501085.56it/s]


>> images:
 [{'file_name': 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/574.jpg', 'id': 574, 'video_id': 1, 'caption': 'Session1', 'width': 2048, 'height': 1536, 'frame_index': 574, 'date_captured': 'INCAR', 'dataset': 1}, {'file_name': 'INCAR/20210422/Session1/C1_P7_P8_1/nvs_push/575.jpg', 'id': 575, 'video_id': 1, 'caption': 'Session1', 'width': 2048, 'height': 1536, 'frame_index': 575, 'date_captured': 'INCAR', 'dataset': 1}]





#### CREATE ANNOTATIONS

In [122]:
#EXAMPLE
display(molaformatjson["annotations"][1000])

{'id': 1001,
 'category_id': 1,
 'image_id': 1001,
 'bbox': [0, 0, 2048, 1536],
 'area': 3145728,
 'iscrowd': 0,
 'dataset': 1}

In [123]:
gt['gTruth2']['LabelData'][0]

{'pushing': False}

In [124]:
def create_annotations(molajson, gt, res, cat_l, cat_l_id, cat_l_dset, img_l_id, start_id=0, sensor="rgb"):
    dataset=molajson["datasets"][0]['id']
    # annotations category_id, image_id, bbox, and dataset
    ann_id=[]
    ann_catid=[]
    ann_imgid=[]
    ann_bbox=[]
    ann_dset=[]
    labels=gt['gTruth2']['LabelData']
    for i,l in enumerate(tqdm(labels)):
        annid=start_id+i+1
        current_label=list(l.keys())[0]
        if current_label=='pushing' and not l["pushing"]: current_label="nonpushing" #specific rule of gt labelling
        catidx=cat_l.index(current_label)
        catid=cat_l_id[catidx]
        imgidx=i #frame
        imgid=img_l_id[imgidx]
        bbox=[0, 0, res[sensor][0], res[sensor][1]] # [x,y,width,height], #default RGB
        area=res[sensor][0]*res[sensor][1] #default RGB
        ann_id.append(annid)
        ann_catid.append(catid)
        ann_imgid.append(imgid)
        ann_bbox.append(bbox)
        ann_dset.append(dataset)
        molajson['annotations'].append({'id':annid,
                                        'category_id':catid,
                                        'image_id':imgid,
                                        'bbox': bbox,
                                        'area': area,
                                        'iscrowd': 0,
                                        'dataset':dataset})
    print("\n>> annotations:\n", molajson['annotations'][-2:])
    return molajson, ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset
molajson, ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset=create_annotations(molajson, gt,res, cat_l, cat_l_id, cat_l_dset, img_l_id)

100%|██████████| 575/575 [00:00<00:00, 481478.30it/s]


>> annotations:
 [{'id': 574, 'category_id': 2, 'image_id': 574, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 575, 'category_id': 2, 'image_id': 575, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}]





In [125]:
print("\n>> annotations:\n", molajson['annotations'][-5:])


>> annotations:
 [{'id': 571, 'category_id': 2, 'image_id': 571, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 572, 'category_id': 2, 'image_id': 572, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 573, 'category_id': 2, 'image_id': 573, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 574, 'category_id': 2, 'image_id': 574, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}, {'id': 575, 'category_id': 2, 'image_id': 575, 'bbox': [0, 0, 2048, 1536], 'area': 3145728, 'iscrowd': 0, 'dataset': 1}]


In [126]:
def create_video_annotations(molajson, gt, res, cat_l, cat_l_id, cat_l_dset, video_l_id, start_id=0, sensor="rgb"):
    dataset=molajson["datasets"][0]['id']
    # annotations category_id, image_id, bbox, and dataset
    ann_id=[]
    ann_catid=[]
    ann_videoid=[]
    ann_dset=[]
    labels=gt['gTruth2']['LabelData']
    #extract frames and correspondent labels
    labels_l=[]
    frames_l=[]
    previous_label=''
    frames_per_label=[]
    for i,l in enumerate(labels): #UNSPECIFIC LABEL EXTRACTOR
        current_label=list(l.keys())[0]
        if current_label=='pushing' and not l["pushing"]: current_label="nonpushing" #specific rule of gt labelling
        if previous_label != current_label or i+1>=len(labels):
            if i+1>=len(labels): frames_per_label.append(i+1)
            if frames_per_label: frames_l.append(frames_per_label)
            if previous_label !='': labels_l.append(previous_label)
            previous_label=current_label
            frames_per_label=[]
        frames_per_label.append(i+1)
    #create video annotations
    for i,c in enumerate(labels_l):
        annid=start_id+i+1
        catidx=cat_l.index(c)
        label_frames=frames_l[i]
        if not label_frames: continue #no frames
        catid=cat_l_id[catidx] 
        videoidx=0 #only one video per scenario
        videoid=video_l_id[videoidx]
        ann_id.append(annid)
        ann_catid.append(catid)
        ann_videoid.append(videoid)
        ann_dset.append(dataset)
        molajson['video_annotations'].append({'id':annid,
                                        'category_id':catid,
                                        'video_id':videoid,
                                        'frame_start': int(label_frames[0]), #in frames, then it can be converted using the fps
                                        'frame_end': int(label_frames[-1]), #in frames
                                        "label_frames": len(label_frames),
                                        'dataset':dataset})
    print("\n>> video_annotations:\n", molajson['video_annotations'][-2:])
    """  
    frames_pushing=[i+1 for i,l in enumerate(labels) if l["pushing"]]
    frames_nonpushing=[i+1 for i,l in enumerate(labels) if not l["pushing"]]
    for i,c in enumerate(tqdm(cat_l)):
        annid=start_id+i+1
        catidx=i
        #specific - TODO unspecific
        label_frames=frames_pushing
        if c=="nonpushing": label_frames=frames_nonpushing
        if not label_frames: continue #no frames of this category, therefore video of this category
        catid=cat_l_id[catidx]
        #dataset=cat_l_dset[catidx]
        videoidx=0 #only one video per scenario
        videoid=video_l_id[videoidx]
        ann_id.append(annid)
        ann_catid.append(catid)
        ann_videoid.append(videoid)
        ann_dset.append(dataset)
        molajson['video_annotations'].append({'id':annid,
                                        'category_id':catid,
                                        'video_id':videoid,
                                        'frame_start': int(label_frames[0]), #in frames, then it can be converted using the fps
                                        'frame_end': int(label_frames[-1]), #in frames
                                        "label_frames": len(label_frames),
                                        'dataset':dataset})
    print("\n>> video_annotations:\n", molajson['video_annotations'][-2:])"""
    return molajson, ann_id, ann_catid, ann_videoid, ann_dset
molajson, ann_id, ann_catid, ann_videoid, ann_dset=create_video_annotations(molajson, gt,res, cat_l, cat_l_id, cat_l_dset, video_l_id)


>> video_annotations:
 [{'id': 2, 'category_id': 1, 'video_id': 1, 'frame_start': 364, 'frame_end': 514, 'label_frames': 151, 'dataset': 1}, {'id': 3, 'category_id': 2, 'video_id': 1, 'frame_start': 515, 'frame_end': 575, 'label_frames': 61, 'dataset': 1}]


#### Missings handler

In [127]:
def missings_handler(imgdir,sensor):
    gt = {'gTruth2': {
        'DataSource': ['INCAR/20210521/Session 2/C9_P4_P3_2/rgb/1.png'],
        'LabelDefinitions': [{'Name': 'pushing','Type': 'Scene','LabelColor': [0, 0.7241, 0.6552],'Group': 'None','Description': ''}],
        'LabelData': [{'pushing': False}]}}
    DIR = os.path.join(imgdir,sensor)
    L_DIR = os.listdir(DIR)
    L_DIR = sorted(L_DIR, key=lambda x: int(os.path.splitext(x.split('/')[-1])[0]))
    frameslist=[name for name in L_DIR if os.path.isfile(os.path.join(DIR, name))]
    gt['gTruth2']['DataSource']=[os.path.join(DIR, frame) for frame in frameslist]
    gt['gTruth2']['LabelData']=[{'pushing': False} for frame in frameslist]
        
    return gt
gt=missings_handler('/mnt/Data-Ext/Datasets/Internal Datasets/EASYRIDE/P19/INCAR/20210422/Session1/C13_P7_P8_1','rgb')

# MULTI CASE STUDY (For loop script)

In [128]:
datasets=['INCAR', 'INVICON']#['INCAR', 'INVICON']
rdir=rdir
# FOR LOOP"
datasetsdir = os.listdir(rdir)
missing_gt_json=[]
missing_gt_mat=[]
label_folder=""#"gt"
missings_handler_b=True 
label_fname="gt_nvs_push.json"#"gt.json"
label_mat_fname="gt_nvs_push.mat"#"gt.m"
sensor="nvs_push"
ext=".mp4"
did=1 #start dataset id
for dataset in datasetsdir:
    if dataset in datasets:
        daysdir = os.path.join(rdir, dataset)
        if not os.path.isdir(daysdir): continue  # test if is a folder
        days = os.listdir(daysdir)
        print(">>>\n EXTRACTING DATASET: "+dataset)
        #INIT JSON
        molafile=rdir+dataset+'/'+'push_mola.json'
        init_json(file=molafile)
        molajson =  json.load(open(molafile))
        molajson['datasets'] = [{'name': dataset, 'id': did}] #[{'name': d, 'id': i+1} for i,d in enumerate(datasets)]
        did+=1 #nem dataset added
        with open(molafile, 'w') as f:
            json.dump(molajson, f)
        #INIT VARS
        imported_cats = False # import cats from each dataset
        cat_start_id = 0
        video_start_id = 0
        img_start_id = 0
        ann_start_id = 0
        vid_ann_start_id = 0
        cat_l, cat_l_id, cat_l_dset = [], [], []
        video_l, video_l_id = [], []
        img_l, img_l_id = [], []
        ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset = [], [], [], [], []
        vid_ann_id, vid_ann_catid, ann_videoid, vid_ann_dset = [], [], [], []
        #FOR LOOP
        for day in days:
            sessiondir = os.path.join(daysdir, day)
            if not os.path.isdir(sessiondir): continue  # test if is a folder
            sessions = os.listdir(sessiondir)
            for session in sessions:
                scenariosdir = os.path.join(sessiondir, session)
                if not os.path.isdir(scenariosdir): continue  # test if is a folder
                scenarios = os.listdir(scenariosdir)
                for scenario in scenarios:
                    if scenario in [""]: continue #TODO: add list of SCENARIOS that we don't want to use
                    imgdir = os.path.join(scenariosdir, scenario)
                    if not os.path.isdir(imgdir): continue  # test if is a folder
                    labeldir = os.path.join(imgdir, label_folder)
                    # if not os.path.isdir(labeldir): continue #should exist
                    filename = os.path.join(labeldir, label_fname)
                    try:
                        gt = json.load(open(filename))
                    except:
                        if missings_handler_b:
                            print("\n\n>>>>>>>CONVERTING MISSING  gtFILE: ", filename)
                            try:
                                gt = missings_handler(imgdir,sensor)
                                gt = fix_pahts(gt, dataset_root=dataset) #gTruth2 can be also missing missing
                                missing_gt_json.append([filename,"CONVERTED"])
                                if not os.path.isfile(filename.replace(label_fname, label_mat_fname)): missing_gt_mat.append(filename.replace(label_fname, label_mat_fname))
                            except Exception as e:
                                print(">>>>>>>MISSING: ", filename, e, sys.exc_info()[0])
                                missing_gt_json.append([filename,"FAIL_CONVERT"])
                                if not os.path.isfile(filename.replace(label_fname, label_mat_fname)): missing_gt_mat.append(filename.replace(label_fname, label_mat_fname))
                                continue
                        else:
                            print(">>>>>>>MISSING : ", filename)
                            missing_gt_json.append([filename,"MISSING"])
                            if not os.path.isfile(filename.replace(label_fname, label_mat_fname)): missing_gt_mat.append(filename.replace(label_fname, label_mat_fname))
                            continue
                    #Fix paths
                    try:
                        gt = fix_pahts(gt, dataset_root=dataset) #gTruth2 can be also missing missing
                    except:
                        print(">>>>>>> BUG gtFILE: ", filename)
                        missing_gt_json.append([filename,"BUG in GT file"])
                        if not os.path.isfile(filename.replace(label_fname, label_mat_fname)): missing_gt_mat.append(filename.replace(label_fname, label_mat_fname))
                        continue
                    # update molajson
                    if not imported_cats:  # only imports one time
                        molajson, cat_l, cat_l_id, cat_l_dset = import_categories(molajson, gt, start_id=cat_start_id)
                        imported_cats = True        
                    molajson, video_l, video_l_id=import_videos(molajson, gt, res,
                                                                start_id=video_start_id,
                                                                sensor=sensor,
                                                                ext=ext)
                    molajson, img_l, img_l_id=import_images(molajson, gt, res, 
                                                            start_id=img_start_id, 
                                                            video_id=video_l_id[-1])
                    molajson, ann_id, ann_catid, ann_imgid, ann_bbox, ann_dset = create_annotations(molajson, gt, res,
                                                                                                    cat_l, cat_l_id,
                                                                                                    cat_l_dset, img_l_id,
                                                                                                    start_id=ann_start_id,
                                                                                                    sensor=sensor)
                    molajson, vid_ann_id, vid_ann_catid, ann_videoid, vid_ann_dset=create_video_annotations(molajson, gt,res, 
                                                                                                cat_l, cat_l_id, 
                                                                                                cat_l_dset, video_l_id,
                                                                                                start_id=vid_ann_start_id,
                                                                                                sensor=sensor)
                    # update start ids to the last id
                    try:                     
                        cat_start_id = cat_l_id[-1]
                        video_start_id = video_l_id[-1]
                        img_start_id = img_l_id[-1]
                        ann_start_id = ann_id[-1]
                        vid_ann_start_id = vid_ann_id[-1]
                    except Exception as e:
                        print(">>>>>>> BUG IDs: ", filename, e, sys.exc_info()[0])
                        missing_gt_json.append([filename,"BUG IDs, missing frames?", e])
                        continue


        # results
        for k in molajson:
            print(k, len(molajson[k]))

        # # Save
        print('\n >> SAVING...')
        jsonfile=molafile
        with open(jsonfile, 'w') as f:
            json.dump(molajson, f)
        with open(jsonfile.replace('.json', '_missing_gtmat.txt'),'w') as f:
            f.write(str(missing_gt_mat))
        with open(jsonfile.replace('.json', '_missing_gtjson.txt'),'w') as f:
            f.write(str(missing_gt_json))
        print("JSON SAVED : {} \n".format(jsonfile))

        #retest results
        molajson =  json.load(open(molafile))
        for k in molajson:
            print(k, len(molajson[k]))

100%|██████████| 1/1 [00:00<00:00, 18236.10it/s]

>>>
 EXTRACTING DATASET: INCAR
JSON INITIATED : /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/push_mola.json


>>>>>>>CONVERTING MISSING  gtFILE:  /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P7_P8_1/gt_nvs_push.json
>>>>>>>MISSING:  /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P7_P8_1/gt_nvs_push.json [Errno 2] No such file or directory: '/mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P7_P8_1/nvs_push' <class 'FileNotFoundError'>


>>>>>>>CONVERTING MISSING  gtFILE:  /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P7_P8_2/gt_nvs_push.json
>>>>>>>MISSING:  /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P7_P8_2/gt_nvs_push.json [Errno 2] No such file or directory: '/mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P7_P8_2/nvs_push' <class 'FileNotFoundError'>


>>>>>>>CONVERTING MISSING  gtFILE:  /mnt/Data-Ext/Recordings/EASYRIDE/P19/INCAR/20210422/Session1/C10_P8_P7_1/gt_nvs_




KeyError: 'nvs_push'

### TESTING 

In [44]:
molafile=rdir+'INCAR/'+'push_mola.json'
print(molafile)
molajson =  json.load(open(molafile))
for k in molajson:
    print(k, len(molajson[k]))

/mnt/Data-Ext/Datasets/Internal Datasets/EASYRIDE/P19/INCAR/mola.json
info 5
licenses 0
categories 2
videos 1275
images 668992
tracks 0
segment_info 0
annotations 668992
video_annotations 2111
datasets 1


In [34]:
molafile=rdir+'INVICON/'+'push_mola.json'
molajson =  json.load(open(molafile))
for k in molajson:
    print(k, len(molajson[k]))

info 5
licenses 0
categories 2
videos 1278
images 592201
tracks 0
segment_info 0
annotations 592201
video_annotations 2041
datasets 1


In [35]:
molajson["datasets"][-1]

{'name': 'INVICON', 'id': 1}

In [36]:
molajson["categories"][-1]

{'name': 'NONVIOLENT', 'id': 2, 'dataset': 1}

In [37]:
molajson["videos"][13]

{'name': 'INVICON/20210413/Session1/C18_P2_P7_2/INVICON_20210413_Session1_C18_P2_P7_2_rgb.mp4',
 'id': 14,
 'width': 2048,
 'height': 1536,
 'sensor': 'rgb',
 'fps': 30,
 'total_frames': 394,
 'dataset': 1}

In [38]:
molajson["images"][0]

{'file_name': 'INVICON/20210413/Session1/C13_P2_P7_1/rgb/1.png',
 'id': 1,
 'video_id': 1,
 'caption': 'Session1',
 'width': 2048,
 'height': 1536,
 'frame_index': 1,
 'date_captured': 'INVICON',
 'dataset': 1}

In [39]:
molajson["images"][-1]

{'file_name': 'INVICON/20210511/Session1/C6_P16_P15_2/rgb/505.png',
 'id': 592201,
 'video_id': 1278,
 'caption': 'Session1',
 'width': 2048,
 'height': 1536,
 'frame_index': 505,
 'date_captured': 'INVICON',
 'dataset': 1}

In [40]:
molajson["annotations"][-1]

{'id': 592201,
 'category_id': 1,
 'image_id': 592201,
 'bbox': [0, 0, 2048, 1536],
 'area': 3145728,
 'iscrowd': 0,
 'dataset': 1}

In [41]:
molajson["annotations"][0]

{'id': 1,
 'category_id': 2,
 'image_id': 1,
 'bbox': [0, 0, 2048, 1536],
 'area': 3145728,
 'iscrowd': 0,
 'dataset': 1}

In [42]:
molajson["video_annotations"][32]

{'id': 33,
 'category_id': 2,
 'video_id': 25,
 'frame_start': 1,
 'frame_end': 511,
 'label_frames': 511,
 'dataset': 1}

In [43]:
# create video ann dict {video_id_category_id: video_annotations}
videos_ann = {'%g' % x['id'] +'_'+'%g' % x['video_id'] +'_'+'%g' % x['category_id']: x for x in molajson['video_annotations']}
k_14_2=[k for k in list(videos_ann.keys()) if k.find('_14_2')>-1]
for k in k_14_2:
    display(videos_ann[k])

{'id': 14,
 'category_id': 2,
 'video_id': 14,
 'frame_start': 1,
 'frame_end': 394,
 'label_frames': 394,
 'dataset': 1}