## Create motion-based datasets from a Labelbox-style or COCO-style dataset

Types of datasets:
- Grids of crops of individual tracklets (grid-crops)
- On last image of tracklets, plot the past trajectories (traj-line-plots)
- On black background, plot all crops of the trajectory (traj-crop-plots)

In [1]:
# Global settings and datasets

# Labelbox dataset:
# labelbox_json = "/media/data/warp_data/wrsi-datasets/Labelbox-Export-WHOI-RSI-USVI-Fish-detect-and-track - 6_11_2024.ndjson"
# image_root_dir = "/media/data/warp_data/wrsi-datasets/whoi-rsi-fish-detection-yolo-dataset/images"

# COCO dataset:
coco_json = "/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023/coco.json"
image_root_dir = "/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023/"

output_root_dir = "/media/data/warp_data/wrsi-datasets/whoi-rsi-fish-motion-dataset"

In [2]:
# Setup Labelbox
import labelbox as lb
import labelbox.types as lb_types
import uuid
import base64
import requests

LABELBOX_API_KEY = "labelbox_api_key.txt"
ONTOLOGY_ID = "clqo6bd8v0jc407ybc1r9ehlb"
PROJECT_ID = 'clqoh3ylw1o8s070hd6ch5z7o' # WHOI RSI USVI Fish Track and Detect
DATASET_ID = "clqh7v7qi001r07886j6aws7i"

# Setup client
with open(LABELBOX_API_KEY,"r") as f:
    API_KEY = f.read().strip()
client = lb.Client(api_key=API_KEY)

ontology = client.get_ontology(ONTOLOGY_ID)
project = client.get_project(PROJECT_ID)
dataset = client.get_dataset(DATASET_ID)

In [3]:
# Imports
import cv2
import os
from tqdm import tqdm
import glob
from pathlib import Path
import json
import numpy as np

# Local imports
import labelbox_fish_utils as lbf

In [4]:
# All annotations assumed to be in x, y, w, h format

def crop_image(image_path, annotation, square_crop=True, resize=(244,244)):
    img = cv2.imread(image_path)
    x, y, w, h = annotation

    if square_crop:
        m = max(w,h)
        cx = int(x + w/2)
        cy = int(y + h/2)
        crop = img[int(cy-h/2):int(cy+h/2), int(cx-w/2):int(cx+w/2), :]
    else:
        crop = img[y:y+h, x:x+w, :]

    if resize:
        crop = cv2.resize(crop, resize)
    return crop

def trajectory_image(images, annotations, mode="crop"):
    """
    Inputs: An ordered list of images and annotations
    Mode traj: Plots the previous bbox trajectories of the object on last image
    Mode crop: Plots the previous crops on a black image, last one on top
    """
    last_image = images[:-1]

    if mode == "crop":
        base_image = np.zeros(last_image.shape)
    elif mode == "traj":
        base_image = last_image

    for image, annotation in zip(images, annotations):
        
def generate_motion_dataset_from_labelbox(json_path, image_root_dir, output_root_dir):
    # Iterate through every tracklet
    pass

def generate_motion_dataset_from_coco(coco_json_path, image_root_dir, output_root_dir, ordered=False):
    """
    Generates a tracklet-based dataset from coco and image_root directory
    If ordered is passed, will generate a separate folder consisting of grid images based on length of trajectories (and thus is sortable)

    todo: rename ordered to traj_len_based or something
    """
    # Parse COCO json
    with open(coco_json_path, "r") as f:
        coco = json.load(f)

    # If ordered, outputs a single non-nested directory of grids that are sortable by length of the trajectory
    ordered_grids = []

    if ordered:
        ordered_dir = os.path.join(output_root_dir, "ordered_grids")
        os.makedirs(ordered_dir, exist_ok=True)
    
    # Iterate through every object
    for object_track in tqdm(coco["object_tracks"]):
        bbox_id_list = object_track["bbox_id_list"]

        bboxes = []
        image_paths = []
        crops = []
        
        # Get bounding boxes and images
        for bbox_id in bbox_id_list:
            
            # Get bbox
            annotation = coco["annotations"][bbox_id]
            bbox = annotation["bbox"]

            # Get image
            image_id = annotation["image_id"]
            image = coco["images"][image_id]
            image_path = os.path.join(image_root_dir, image["file_name"])

            # Crop image
            crop = crop_image(image_path, bbox, resize=(244,244))

            bboxes.append(bbox)
            crops.append(crop)
            image_paths.append(image_path)

        # Make an image grid
        img_grid = np.hstack(crops)

        if ordered:
            #(img_grid, object_track_id, traj_len)
            traj_len = len(crops)
            object_track_id = object_track["id"]
            ordered_grids.append((img_grid, object_track["id"], traj_len))
            ordered_output_path = os.path.join(ordered_dir, f"traj_{traj_len}_obj_id_{object_track_id}.png")
            cv2.imwrite(ordered_output_path, img_grid)
            
        # Save stacked image
        video_path = coco["video_sequences"][object_track["video_seq_id"]]["file_name"]
        output_dir_path = os.path.join(output_root_dir, video_path)
        
        #os.makedirs(output_dir_path, exist_ok=True)
        output_path = os.path.join(output_dir_path, f"{object_track['id']}.png")
        #cv2.imwrite(output_path, img_grid)


IndentationError: expected an indented block after 'for' statement on line 32 (1945567551.py, line 35)

In [21]:
# Generate the dataset
generate_motion_dataset_from_coco(coco_json, image_root_dir, output_root_dir, ordered=True)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 15802/15802 [49:10<00:00,  5.36it/s]


15801: 1
15800: 1
15799: 1
15798: 1
15797: 1
15796: 1
15795: 1
15794: 1
15793: 1
15792: 1
15791: 1
15790: 1
15789: 2
15788: 2
15787: 2
15786: 1
15785: 3
15784: 2
15783: 2
15782: 3
15781: 3
15780: 2
15779: 2
15778: 3
15777: 4
15776: 3
15775: 4
15774: 4
15773: 3
15772: 2
15771: 2
15770: 3
15769: 4
15768: 2
15767: 4
15766: 3
15765: 1
15764: 1
15763: 6
15762: 1
15761: 1
15760: 1
15759: 5
15758: 2
15757: 2
15756: 5
15755: 1
15754: 1
15753: 2
15752: 7
15751: 7
15750: 1
15749: 1
15748: 1
15747: 3
15746: 2
15745: 8
15744: 3
15743: 3
15742: 1
15741: 2
15740: 1
15739: 1
15738: 1
15737: 2
15736: 5
15735: 1
15734: 5
15733: 6
15732: 6
15731: 5
15730: 7
15729: 10
15728: 3
15727: 6
15726: 1
15725: 1
15724: 2
15723: 5
15722: 2
15721: 2
15720: 2
15719: 1
15718: 3
15717: 8
15716: 11
15715: 3
15714: 4
15713: 3
15712: 12
15711: 4
15710: 4
15709: 4
15708: 7
15707: 11
15706: 2
15705: 2
15704: 6
15703: 6
15702: 4
15701: 4
15700: 1
15699: 1
15698: 4
15697: 3
15696: 2
15695: 2
15694: 1
15693: 2
15692: 7
15691: