# json2mmaction2
version: 1

info:
- Create standard MOLA JSON

author: nuno costa

In [11]:
import json
import os
import argparse
import time

import glob
import shutil
from pathlib import Path
import numpy as np
from tqdm import tqdm

In [12]:
def make_folders(path='../out/'):
    # Create folders
    if os.path.exists(path):
        shutil.rmtree(path)  # delete output folder
    os.makedirs(path)  # make new output folder
    return

In [13]:
def extract_file(src,dst,copy=True):
    extracted=True
    try:
        if copy: 
            if not os.path.exists(os.path.dirname(dst)): os.makedirs(os.path.dirname(dst)) #make sure dir exists
            shutil.copyfile(src, dst)  # raises if missing files
        else: #if not copy only extracting filelist from json
            if not os.path.exists(src): raise
    except:
        print("\n>> missing : {}".format(src))
        extracted=False
    return extracted

# SINGLE CASE STUDY

In [14]:
datasets_root_dir='D:/external_datasets/MOLA/'
json_dir='D:/external_datasets/MOLA/INCAR/'
outdir='D:/external_datasets/MOLA/INCAR_MMA/' #don't forget the last
make_folders(path=outdir)  # output directory
dataset="mola"
json_file = json_dir+'mola.json'
img_number=None #STOP CONDITION : None copies all
copy_images=True #if false only filelist is extracted from json
copy_videos=False
level=2 #level: 1- one level; 2-two-level

### PREPARE DATASET

In [15]:
def write_from_annotation(json_file, data, images, videos, categories, copy_images, copy_videos, outdir_img, outdir_video, img_number=None, level=2):
    # WRITE FILES (COPY & GENERATE FILELIST)
    # image lists
    img_l = []
    saved_img_l= []
    imglist = []
    img_counter = 0 # image counter
    # video lists
    video_l = []
    saved_video_l= []
    videolist = []
    # write files 
    method="for" #TODO: parfor method
    start=time.time()
    if method=="for":
        #WRITE IMAGES
        for x in tqdm(data['annotations'], desc='Annotations %s' % json_file):  
            # extract image info from x['image_id']
            image_id='%g' % x['image_id']
            if image_id in img_l: continue # continue to next loop if repeated image_id 
            img_l.append(image_id)
            img = images[image_id]
            h, w, imgf = img['height'], img['width'], img['file_name']
            _, img_ext = os.path.splitext(imgf)
            img_fn = Path(imgf).stem
            img_new_fn = "img_"+image_id #img_imgid.jpg (imgid with zeros 00001: image_id.zfill(5) ) Problem is I don't no the maximum of images
            # extract video info from img['video_id']
            video_id = '%g' % img["video_id"]
            video = videos[video_id]
            videof= video["name"]
            video_fn = Path(videof).stem
            video_new_fn = "video_"+video_id
            # extract label and category
            catid = '%g' % x['category_id']
            label = catid
            category = categories[catid]
            # extract total label frames
            total_frames = '%g' % x['label_frames']
            # extract category
            category = categories[label]['name']
            # extract bounding box format is [top left x, top left y, width, height] | [x,y,w,h]
            box = np.array(x['bbox'], dtype=np.float64) 
            box[:2] += box[2:] / 2  # xy top-left corner to center
            box[[0, 2]] /= w  # normalize x & w
            box[[1, 3]] /= h  # normalize y & h
            if (box[2] > 0.) and (box[3] > 0.):  # if w > 0 and h > 0
                # write images - 1st because if copy_images fails the rest should not be done
                src = os.path.join(datasets_root_dir, imgf)
                dst = os.path.join(outdir_img, video_new_fn, img_new_fn + img_ext)
                if level==2: dst = os.path.join(outdir_img, category, video_new_fn, img_new_fn + img_ext)
                ext=extract_file(src,dst,copy=copy_images)
                if not ext: continue #if image missing from dataset when extracting images dont write nothing more
                # img list: 
                imgline = f'{video_new_fn}/{img_new_fn}\n' # f'{video_new_fn}/{img_new_fn} {total_frames} {label}\n'
                if level==2: imgline = f'{category}/{video_new_fn}/{img_new_fn}\n' # f'{category}/{video_new_fn}/{img_new_fn} {total_frames} {label}\n'
                imglist.append(imgline)
                img_counter += 1
                # rawframe annotation file list: json to txt [ frame_directory total_frames label  ]
                vidline = f'{video_new_fn} {total_frames} {label}\n'
                if level==2: vidline = f'{category}/{video_new_fn} {total_frames} {label}\n' 
                videolist.append(vidline)
            # STOP conditions
            if img_number and img_counter >= img_number: 
                print("STOP CONDITION")
                break
        #remove duplicate paths
        imglist=list(dict.fromkeys(imglist)) 
        videolist=list(dict.fromkeys(videolist)) 
    stop = time.time()
    elapsed=stop-start
    print("time elapsed:", elapsed)
    return imglist, videolist, saved_img_l, saved_video_l

In [27]:
def mola2mmaction2(datasets_root_dir=None, json_file='mola.json', outdir='out/', copy_images=True, copy_videos=False, img_number=None, level=2):
    # MAKE ROOT DIRS
    videodir_path = 'videos_%s/' % Path(json_file).stem  # folder name (train, val, test) remove other info
    imgdir_path = 'rawframes_%s/' % Path(json_file).stem  # folder name (train, val, test) remove other info
    outdir_video = os.path.join (outdir, videodir_path)
    outdir_img = os.path.join (outdir, imgdir_path)
    if copy_videos: make_folders(path=outdir_video)
    if copy_images: make_folders(path=outdir_img)
    # PARSE JSON ANNOTATIONS
    data=None
    with open(json_file) as f:
        data = json.load(f)
    if not data: raise
    # create image dict {id: image}
    images = {'%g' % x['id']: x for x in data['images']}
    # create video dict {id: video}
    videos = {'%g' % x['id']: x for x in data['videos']}
    # create category dict {id: category}
    categories = {'%g' % x['id']: x for x in data['categories']}
    # WRITE FILES (COPY & GENERATE FILELIST)
    method="from_annotation"
    if method=="from_annotation": imglist, videolist, saved_img_l, saved_video_l=write_from_annotation(json_file, data, images, videos, categories, copy_images, copy_videos, outdir_img, outdir_video, img_number=img_number, level=level)

    return imglist, videolist, saved_img_l, saved_video_l
imglist, videolist, saved_img_l, saved_video_l = mola2mmaction2(datasets_root_dir=datasets_root_dir, json_file=json_file, outdir=outdir, copy_images=copy_images, copy_videos=copy_videos, img_number=img_number, level=level)

Annotations D:/external_datasets/MOLA/INCAR/mola.json: 100%|███████████████████████| 1164/1164 [00:28<00:00, 40.28it/s]

time elapsed: 28.89821696281433





### GENERATE FILELIST

In [7]:
# GENERATE FILELISTS
#save imglist : mola_{train,val}_rawframes.txt
dataset_type='rawframes'
filename=f'{dataset}_{Path(json_file).stem}_{dataset_type}.txt'
with open(outdir + filename, 'w') as f:
    f.writelines(videolist)

### CONFIG MMACTION2
1. Option (RECOMMENDED): create a customconfig.py file 
2. Option: modify other config and train from script (see folder MMACTION2/TESTS/mmaction2_tutorial)

In [37]:
# 2. Option
from mmcv import Config
cfg = Config.fromfile('./configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py')

Given a config that trains a TSN model on kinetics400-full dataset, we need to modify some values to use it for training TSN on mola dataset.


In [None]:
root= outdir #default: "data/"
dataset="mola"
dataset_type = 'RawframeDataset'

# Modify dataset type and path
cfg.dataset_type = dataset_type
cfg.data_root =  root+dataset+imgdir_path
cfg.data_root_val = root+dataset+imgdir_path
cfg.ann_file_train =  root+dataset+'/'+dataset+'_train_rawframes.txt'
cfg.ann_file_val = root+dataset+'/'+dataset+'_val_rawframes.txt'
cfg.ann_file_test = root+dataset+'/'+dataset+'_val_rawframes.txt'

cfg.data.test.type = dataset_type
cfg.data.test.ann_file = root+dataset+'/'+dataset+'_val_rawframes.txt'
cfg.data.test.data_prefix = cfg.data_root_val #'data/kinetics400_tiny/val/'

cfg.data.train.type = dataset_type
cfg.data.train.ann_file = root+dataset+'/'+dataset+'_train_rawframes.txt'
cfg.data.train.data_prefix = cfg.data_root #'data/kinetics400_tiny/train/'

cfg.data.val.type = dataset_type
cfg.data.val.ann_file = root+dataset+'/'+dataset+'_val_rawframes.txt'
cfg.data.val.data_prefix = cfg.data_root_val #'data/kinetics400_tiny/val/'

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 2
# We can use the pre-trained TSN model
cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.data.videos_per_gpu = cfg.data.videos_per_gpu // 16
cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16
cfg.total_epochs = 30

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

### Train a new recognizer

Finally, lets initialize the dataset and recognizer, then train a new recognizer!

In [None]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

## Test the trained recognizer

After finetuning the recognizer, let's check the prediction results!

In [None]:
from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# Build a test dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader)

eval_config = cfg.evaluation
eval_config.pop('interval')
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')

# MULTI CASE STUDY (For loop script for each json - train, val , test)

In [30]:
def convert_mola_json(dataset="mola", datasets_root_dir=None, json_dir='../mola/annotations/', outdir='out/', copy_images=True, copy_videos=False, img_number=None, level=2):
    # Convert motionLab JSON file into  labels --------------------------------
    make_folders(path=outdir)  # output directory
    jsons = glob.glob(json_dir + '*.json')
    # Import json
    for json_file in sorted(jsons):
        imglist, videolist, saved_img_l, saved_video_l = mola2mmaction2(datasets_root_dir=datasets_root_dir, 
                                                                        json_file=json_file, 
                                                                        outdir=outdir, 
                                                                        copy_images=copy_images, 
                                                                        copy_videos=copy_videos, 
                                                                        img_number=img_number,
                                                                        level=level
                                                                       )
        # GENERATE FILELISTS
        #save videolist : mola_{train,val}_rawframes.txt
        dataset_type='rawframes'
        filename=f'{dataset}_{Path(json_file).stem}_{dataset_type}.txt'
        with open(outdir + filename, 'w') as f:
            f.writelines(videolist)


In [31]:
datasets_root_dir='D:/external_datasets/MOLA/'
json_dir='D:/external_datasets/MOLA/INCAR/'
outdir='D:/external_datasets/MOLA/INCAR_MMA_FORMAT/'
img_number=None
copy_images=True
copy_videos=False
level=2
convert_mola_json(datasets_root_dir=datasets_root_dir, json_dir=json_dir, outdir=outdir, 
                  img_number=img_number, copy_images=copy_images, copy_videos=copy_videos, level=level)

Annotations D:/external_datasets/MOLA/INCAR\mola.json: 100%|███████████████████████| 1164/1164 [00:29<00:00, 39.93it/s]

time elapsed: 29.156079530715942





# FAILED CODE IDEAS

In [13]:
#WARNING TO hard to implement when you want to write images
#TO ADD IMGID you need to search for imgid on video
def write_from_video_annotation(data, images, videos, categories, copy_images, copy_videos, outdir_img, outdir_video):
    # WRITE FILES (COPY & GENERATE FILELIST)
    # video lists
    video_l = []
    saved_video_l= []
    videolist = []
    # write files 
    method="for" #TODO: parfor method
    start=time.time()
    if method=="for":
        #WRITE IMAGES
        for x in tqdm(data['video_annotations'], desc='Video Annotations %s' % json_file):  
            # extract video info from img['video_id']
            video_id = '%g' % x["video_id"]
            video = videos[video_id]
            videof= video["name"]
            video_sensor=video["sensor"]
            video_fn = Path(videof).stem
            video_new_fn = "video_"+video_id
            # extract label and category
            catid = '%g' % x['category_id']
            label = catid
            category = categories[catid]
            # extract total label frames
            total_frames = '%g' % x['label_frames']
            # extract category
            category = categories[label]['name']
            time_start = x['time_start'] #first frame
            time_end = x['time_end'] #end frame
            # extract image info from x['image_id']
            imgdir=os.path.dirname(videof)+'/'+video_sensor+'/'
            img_l= sorted( filter( os.path.isfile, glob.glob(imgdir + '*') ) )
            # write images - 1st because if copy_images fails the rest should not be done
            # image lists
            img_l = []
            saved_img_l= []
            imglist = []
            img_counter = 0 # image counter
            for imgd in img_l:
                src = os.path.join(imgd)
                dst = os.path.join(outdir_img, video_new_fn, img_new_fn + img_ext) #TODO: how to add imgid you need to search 
                if level==2: dst = os.path.join(outdir_img, category, video_new_fn, img_new_fn + img_ext)
                ext=extract_file(src,dst,copy=copy_images)
                if not ext: continue #if image missing from dataset when extracting images dont write nothing more
                # rawframe annotation file list: json to txt [ frame_directory total_frames label  ]
                imgline = f'{video_new_fn} {total_frames} {label}\n' # f'{video_new_fn}/{img_new_fn} {total_frames} {label}\n'
                if level==2: imgline = f'{category}/{video_new_fn} {total_frames} {label}\n' # f'{category}/{video_new_fn}/{img_new_fn} {total_frames} {label}\n'
                imglist.append(imgline)
                img_counter += 1
        #remove duplicate paths
        imglist=list(dict.fromkeys(imglist)) 
    stop = time.time()
    elapsed=stop-start
    print("time elapsed:", elapsed)
    return imglist, videolist, saved_img_l, saved_video_l