In [1]:
import os

import cv2
from datetime import datetime
import glob
import json
import jsonlines
import matplotlib.pyplot as plt
from pathlib import Path

In [2]:
# dataset_root = Path("/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023")
dataset_root = Path("/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023")

annotations_dir = dataset_root / "annotations"

In [3]:
annotation_files = glob.glob(f"{annotations_dir}/consolidated-annotation/output/*/*.json")
print(len(annotation_files))

162


In [4]:
manifest_json = dataset_root / "28102023_manifest.json"
key_to_video_dir = {}
with jsonlines.open(manifest_json) as reader:
    for line_num, line in enumerate(reader):
        seq_id = Path(line['source-ref'])
        frame_folder = Path(str(seq_id.parent).replace("s3:/whoi-rsi-fish-detection/datasets/imerit_26102023_clips", str(dataset_root)))
        key_to_video_dir[line_num] = frame_folder

In [5]:
coco_images = []
coco_annotations = []
video_sequences = []
object_tracks = []

img_num = 0
ann_id = 0
video_id = 0
tracklet_id = 0

do_viz = False
read_frame_from_disk = False or do_viz
img_w = 1920
img_h = 1080
expected_last_frame = 89

for file_num, a_f in enumerate(annotation_files):
    with jsonlines.open(a_f) as reader:
        vid_num = int(Path(a_f).parent.name)
        vid_dir = key_to_video_dir[vid_num]

        for check, video_seq in enumerate(reader):
            assert check == 0 # each json just has one long line
                
            num_frames_with_annotations = len(video_seq['tracking-annotations'])
            video_frame_list = []
            tracklets = {}

            last_frame_num = -1
            for ann_frame_num, per_frame in enumerate(video_seq['tracking-annotations']):
                # keys: (['annotations', 'frame-no', 'frame', 'frame-attributes'])
                # ann_frame_num may run from 0 to a number less than  or equal to 89
                # frame-no here corresponds to the frame number 0 to 89 inclusive
                # frame is the frame_%6d.png file name
                annotations = per_frame['annotations']
                frame_num = int(per_frame['frame-no'])
                frame_str = per_frame['frame']

                # sometimes we go a frame with no annotations. we should...
                if last_frame_num != frame_num - 1:
                    # 1. create empty images in COCO dataset
                    while last_frame_num < frame_num - 1:
                        frame_path = vid_dir / f'frame_{last_frame_num + 1:03d}.png'
                        if read_frame_from_disk:
                            img_bgr = cv2.imread(str(frame_path))
                            img_h, img_w, img_c = img.shape 
                        else:
                            assert os.path.exists(str(frame_path))
                        dataset_relative_path = frame_path.relative_to(dataset_root)
                        coco_images.append({
                            "id": img_num, 
                            "width": img_w, 
                            "height": img_h, 
                            "file_name": str(dataset_relative_path), 
                            "license": 0,
                            "date_captured": "",
                        })
                        video_frame_list.append(img_num)
                        img_num += 1
                        last_frame_num += 1
                        
                    # 2. delete all live fish
                    pass

                # yay process current frame
                frame_path = vid_dir / per_frame['frame']
                if read_frame_from_disk:
                    img_bgr = cv2.imread(str(frame_path))
                    img_h, img_w, img_c = img.shape
                else:
                    assert os.path.exists(str(frame_path)), str(frame_path)
                dataset_relative_path = frame_path.relative_to(dataset_root)
                coco_images.append({
                    "id": img_num, 
                    "width": img_w, 
                    "height": img_h, 
                    "file_name": str(dataset_relative_path), 
                    "license": 0,
                    "date_captured": "",
                })

                for fish in annotations:
                    #keys: (['height', 'width', 'top', 'left', 'class-id', 'label-category-attributes', 'object-id', 'object-name'])
                    x,y,w,h = fish['left'], fish['top'], fish['width'], fish['height']
                    obj_id = fish['object-id']
                    obj_name = fish['object-name']
                    area = fish['height'] * fish['width']

                    '''
                    data parsing
                    '''
                    coco_annotations.append({
                       "id": ann_id,
                       "image_id": img_num,
                       "category_id": 1, # fish
                       "bbox": [x, y, w, h],
                       "area": area,
                       "iscrowd": 0
                    })
                    if obj_id in tracklets:
                        tracklets[obj_id].append(ann_id)
                    else:
                        tracklets[obj_id] = [ann_id]
                    ann_id += 1
                    
                    '''
                    visualization
                    '''
                    if do_viz:
                        # viz params
                        color = (0, 0, 255)
                        text_color = (255, 255, 255)
                        text_size = 2.0
                        text_width = 2
                        
                        # bounding box
                        cv2.rectangle(img_bgr, (int(x), int(y)), (int(x+w), int(y+h)), color, 5)
    
                        # text
                        (tw, th), _ = cv2.getTextSize(obj_name, cv2.FONT_HERSHEY_SIMPLEX, text_size, text_width)
                        
                        # Prints the text.    
                        cv2.rectangle(img_bgr, (x, y - int(th * 1.5)), (x + tw, y), color, -1)
                        cv2.putText(img_bgr, obj_name, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, text_size, text_color, text_width)
                        
                        # For printing text
                        cv2.putText(img, 'test', (x, y), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255,255,255), 1)

                if do_viz:
                    # visualization
                    print(f"Frame number: {frame_num}")
                    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
                    plt.figure()
                    plt.imshow(img_rgb)

                    if ann_frame_num >= 9:
                        break

                # bookkeeping
                video_frame_list.append(img_num)
                img_num += 1
                last_frame_num = frame_num

            # sometimes, the last image doesn't have an annotation
            while last_frame_num < expected_last_frame:
                frame_path = vid_dir / f'frame_{last_frame_num + 1:03d}.png'
                if read_frame_from_disk:
                    img_bgr = cv2.imread(str(frame_path))
                    img_h, img_w, img_c = img.shape 
                else:
                    assert os.path.exists(str(frame_path))
                dataset_relative_path = frame_path.relative_to(dataset_root)
                coco_images.append({
                    "id": img_num, 
                    "width": img_w, 
                    "height": img_h, 
                    "file_name": str(dataset_relative_path), 
                    "license": 1,
                    "date_captured": "",
                })
                video_frame_list.append(img_num)
                img_num += 1
                last_frame_num += 1

            # per video bookkeeping
            viddir_rel_dset_root = vid_dir.relative_to(dataset_root)
            assert len(video_frame_list) == 90
            video_sequences.append({
                "id": video_id,
                "image_id_list": video_frame_list,
                "file_name": str(viddir_rel_dset_root),
            })

            for obj_id, bbox_id_seq in tracklets.items():
                for bbox_id in bbox_id_seq:
                    assert bbox_id == coco_annotations[bbox_id]["id"]
                    
                object_tracks.append({
                    "id": tracklet_id,
                    "bbox_id_list": bbox_id_seq,
                    "image_id_list": [coco_annotations[bbox_id]["image_id"] for bbox_id in bbox_id_seq],
                    "video_seq_id": video_id,
                    "category_id": 1, # fish
                })
                tracklet_id += 1
            video_id += 1

        if do_viz:
            print(f"only visualizing first 10 annotated frames on first video...")
            break

In [6]:
coco_info = {
    "year": 2023, 
    "version": "1.0", 
    "description": "whoi-rsi-fish-detection", 
    "contributor": "WHOI RSI", 
    "url": "warp.whoi.edu", 
    "date_created": datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
}
coco_license = {
        "url": "TBD", 
        "id": 1, 
        "name": "TBD"
}
coco_categories = [
    {"supercategory": "fish", "id": 1, "name": "fish"},
]

'''
info: Dict

licenses: List[Dict]

images: List[Dict]
    "id": int,                      // unique for every image
    "width": int, 
    "height": int, 
    "file_name": str,               // dataset relative path to image
    "license": int,
    "date_captured": str,

annotations: List[Dict]
    "id": int,                      // unique for every annotation      
    "image_id": int,           
    "category_id": int,             // 1 = fish, not sure how to consider hierarchies yet but COCO has supercategories for depth 1 trees
    "bbox": List[int],              // [x, y, w, h]  
    "area": int,                    // w * h
    "iscrowd": int

categories: List[Dict]

video_sequences: List[Dict]
    "id": int,                      // unique for every video sequence
    "image_id_list": List[int],     // list of ids of images
    "file_name": str,               // dataset relative path to image directory corresponding to video

object_tracks: List[Dict]
    "id": int,                      // unique for every video sequence
    "bbox_id_list": List[int],      // list of ids of bboxes
    "image_id_list": List[int],     // list of ids of images, maybe redundant (bboxes are each associated with an image)
    "video_seq_id": int,            // maybe redundant (video_sequences has list of image ids)
    "category_id": int,             // maybe redundant (each bbox already has a category)
'''
coco_fmt_dataset = {
    "info": coco_info,
    "licenses": [coco_license], 
    "images": coco_images,
    "annotations": coco_annotations,
    "categories": coco_categories,
    "video_sequences": video_sequences,
    "object_tracks": object_tracks,
}
coco_json_path = dataset_root/ "coco.json"
with open(coco_json_path, 'w') as f:
    json.dump(coco_fmt_dataset, f, indent=4)
print(f"saved json to {coco_json_path}")

saved json to /media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023/coco.json
