In [29]:
# IMPORTS
# Native
import os
import pickle
from datetime import date

# 3th party
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from progressbar import progressbar
import os.path as osp
import mmcv
import json

In [3]:
# General setting
today = date.today()
today = today.strftime('%Y_%m_%d')


# Directory settings
origin_dir = "../data/Moorea_labeled_data"
destination_dir = f"../data/{today}_moorea"

8_2_morning_CE1_Spontaneous_Startle_rectified
R_8_3_afternoon_loom_vidGP010125_012916_short
8_5_CWcam_GP010019_15_32_s
8_2_morning_CW_Spontaneous_Startle_rectified
R_8_4_loom_w_cascade_vidGP020021_020542_short
GOPR2207_s1
8_2_morning_NE_Spontaneous_Startle_rectified
8_3_morning_charge_GP015223_15_00_s
8_4_lastvid
8_2_morning_charge_GP020026_s
GOPR0618_s
8_2_morning_NW_Spontaneous_Startle_rectified
R_8_4_loom_vidGP050021_031950_short
8_4_GP050021
8_2_morning_NR_cascade_group_GP040121_021930
R_8_2_morning_loom_GP040121_021725_short
8_5_NR_loom_w_singlebeat_020550
8_5_CEcam2_GP035226_01_26_s
8_5_CWcam_GP030019_00_23_s
8_5_NR_loom_012053
8_2_morning_charge_GP020026_09_29_s
8_2_morning_CE2_Spontaneous_Startle_rectified
GP062207_s
GOPR2207_s2
8_2_morning_charge02_02_34_45_s


In [6]:
vid_list = sorted(os.listdir(origin_dir))
# vid_list = selected_vids

# print(vid_list)
train, test = train_test_split(vid_list, test_size=0.1, random_state=1)
train, val = train_test_split(train, test_size=0.2, random_state=1)

print(train)
print(val)
print(test)


['R_8_4_loom_w_cascade_vidGP020021_020542_short', 'R_8_3_afternoon_loom_vidGP010125_012916_short', 'GOPR2207_s2', '8_4_GP050021', '8_3_morning_charge_GP015223_15_00_s', '8_2_morning_CW_Spontaneous_Startle_rectified', 'GP062207_s', 'R_8_2_morning_loom_GP040121_021725_short', '8_5_NR_loom_012053', '8_2_morning_NE_Spontaneous_Startle_rectified', '8_2_morning_NW_Spontaneous_Startle_rectified', '8_4_lastvid', '8_2_morning_charge_GP020026_09_29_s', '8_2_morning_charge02_02_34_45_s', '8_5_NR_loom_w_singlebeat_020550', '8_2_morning_CE2_Spontaneous_Startle_rectified', '8_2_morning_NR_cascade_group_GP040121_021930']
['8_5_CEcam2_GP035226_01_26_s', 'R_8_4_loom_vidGP050021_031950_short', 'GOPR2207_s1', '8_2_morning_CE1_Spontaneous_Startle_rectified', '8_2_morning_charge_GP020026_s']
['8_5_CWcam_GP030019_00_23_s', '8_5_CWcam_GP010019_15_32_s', 'GOPR0618_s']


In [77]:
def convert_moorea_data_folder(split, data_dir, destination_dir, out_file):
    dest_img_dir = osp.join(destination_dir, "images")
    # dest_label_dir = osp.join(destination_dir, "labels")
    out_file = osp.join(destination_dir, out_file)
    # Create directories if not exists
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)
        os.makedirs(dest_img_dir)
        # os.makedirs(dest_label_dir)

    annotations = []
    images = []
    obj_count = 0
    img_count = 0
    # annotation_dict = {}
    for d in progressbar(sorted(split)):
        images_dir = osp.join(data_dir, d, "annotated_images")
        # print(d)
        annotations_dir = osp.join(data_dir, d, "annotations")
        annotations_file = osp.join(annotations_dir, f"{d}_coco.json")
        # print(annotations_file)
        with open(annotations_file, 'r') as json_file:
            data = json.load(json_file)
        # annotations.extend(data['annotations'])
        # images.extend(data['images'])
        
        for i in data['images']:
            # print(i['file_name'])
            img_path = osp.join(images_dir, i['file_name'])
            img = mmcv.imread(img_path)
            mmcv.imwrite(img, osp.join(dest_img_dir, i['file_name']))
            images.append(dict(
                    id=img_count,
                    file_name=i['file_name'],
                    height=i['height'],
                    width=i['width']))
            for a in data['annotations']:
                # print(a)
                # print(i)
                if a['image_id'] == i['id']:
                    anno_obj = dict(
                        image_id = img_count,
                        id=obj_count,
                        category_id=1,
                        bbox=a['bbox'],
                        area=a['area'],
                        segmentation=a['segmentation'],
                        iscrowd=0
                    )
                    annotations.append(anno_obj)
                    obj_count += 1
            img_count += 1

    coco_dict = dict(
        images=images,
        annotations=annotations,
        categories=data['categories'],
        info=data['info'])
    mmcv.dump(coco_dict, out_file)

In [80]:
convert_moorea_data_folder(train, origin_dir, destination_dir, "train_coco.json")
convert_moorea_data_folder(test, origin_dir, destination_dir, "test_coco.json")
convert_moorea_data_folder(val, origin_dir, destination_dir, "val_coco.json")

100% (17 of 17) |########################| Elapsed Time: 0:00:15 Time:  0:00:15
100% (3 of 3) |##########################| Elapsed Time: 0:00:02 Time:  0:00:02
100% (5 of 5) |##########################| Elapsed Time: 0:00:03 Time:  0:00:03


In [4]:
# Loop over all subdir
def convert_fish_tracking_to_coco(split, data_dir, destination_dir, out_file):
    dest_img_path = osp.join(destination_dir, "images")
    dest_label_path = osp.join(destination_dir, "labels")
    out_file = osp.join(destination_dir, out_file)
    # Create directories if not exists
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)
        os.makedirs(dest_img_path)
        os.makedirs(dest_label_path)


    annotations = []
    images = []
    obj_count = 0
    # osp.join(data_dir, split)
    for d in progressbar(sorted(split)):
        # print(d)
        # print(data_dir)
        sub_dir = osp.join(data_dir, d)
        video_path = osp.join(sub_dir, video_dir)
        annotation_path = osp.join(sub_dir, label_dir)
    
        for angle in camera_angles:
            annotation_file = osp.join(annotation_path, f"{d}Corr{angle}.pkl")
            annotation_dict = pickle.load(open(annotation_file, "rb"))

            # Load video
            video_file = os.path.join(video_path, f"{d}_{angle}.mp4")
            cap = cv2.VideoCapture(video_file)

            i = 0
            for x,y in zip(annotation_dict["0"]["X"],annotation_dict["0"]["Y"]):
            
                ret, frame = cap.read()

                if np.isnan(x) or np.isnan(y):
                    continue
                
                x_min = int(x) - radius
                y_min = int(y) - radius
                x_max = int(x) + radius
                y_max = int(y) + radius

                # Get frame size
                width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH ))
                height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT ))
                # fps =  cap.get(cv2.CAP_PROP_FPS)

                image = frame

                # Naming scheme
                idx = str(i)
                idx = idx.zfill(5) 
                image_name = f"{d}_{angle}_{idx}"
                
                # Write img
                cv2.imwrite(f"{dest_img_path}/{image_name}.jpg", image)
                
                images.append(dict(
                    id=obj_count,
                    file_name=f"{image_name}.jpg",
                    height=height,
                    width=width))
                # write label
                
                label = ["predator", "0", x_min, y_min, x_max, y_max, width, height]
                with open(f'{dest_label_path}/{image_name}.txt', 'w') as f:
                    for item in label:
                        f.write("%s " % item)
                
                data_anno = dict(
                    image_id=obj_count,
                    id=obj_count,
                    category_id=0,
                    bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                    area=(x_max - x_min) * (y_max - y_min),
                    # segmentation=[],
                    iscrowd=0)
                annotations.append(data_anno)
                
                i+= 1
                obj_count+= 1 
    
    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=[{'id':0, 'name': 'predator'}])
    mmcv.dump(coco_format_json, out_file)

In [79]:
convert_fish_tracking_to_coco(train, origin_dir, destination_dir, "train_coco.json")
convert_fish_tracking_to_coco(test, origin_dir, destination_dir, "test_coco.json")
convert_fish_tracking_to_coco(val, origin_dir, destination_dir, "val_coco.json")

NameError: name 'convert_fish_tracking_to_coco' is not defined