In [37]:
import glob
import jsonlines
from pathlib import Path
import natsort
import json
from tqdm import tqdm

In [38]:
# Setup labelbox client
import labelbox as lb
import labelbox.types as lb_types
import uuid
import base64
import requests

## Option 1: List API_KEY in your Python script
with open("labelbox_api_key.txt","r") as f:
    API_KEY = f.read().strip()
client = lb.Client(api_key=API_KEY)

# Get ontology
print("===ONTOLOGY DETAILS===")
ontology = client.get_ontology("clqo6bd8v0jc407ybc1r9ehlb")
print("Name: ", ontology.name)
tools = ontology.tools()
for tool in tools:
  print(tool)

# Get project
print("===PROJECT DETAILS===")
PROJECT_ID = 'clqoh3ylw1o8s070hd6ch5z7o'
project = client.get_project(PROJECT_ID)
print("Name: ", project.name)


===ONTOLOGY DETAILS===
Name:  WHOI-RSI-USVI-Fish
Tool(tool=<Type.BBOX: 'rectangle'>, name='Fish', required=False, color='#1CE6FF', classifications=[], schema_id='clqo6by6j0if007ya1tvvaqs5', feature_schema_id='clqo6by6j0iez07yabwokelrd')
===PROJECT DETAILS===
Name:  WHOI-RSI-USVI-Fish-detect-and-track


In [39]:
# Setup local directories
dataset_root = Path("/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023")
images_dir = "/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023"
manifest_path = "/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023/28102023_manifest.json"
coco_json_path = dataset_root / "coco.json"

# Parse manifest to get proper filenames and directories
# We use the relative path and video name with aws_id as GLOBAL_KEYS for LABELBOX
ordered_video_dirs = []
ordered_global_keys = []
ordered_videos_with_aws_ids = []
labelbox_external_id_to_global_key = {}
video_name_to_global_key = {}
replace_root = "s3://whoi-rsi-fish-detection/datasets/imerit_26102023_clips/"
with jsonlines.open(manifest_path) as f:
    for video_num, video_json in enumerate(f):
        video_path = Path(video_json['source-ref'].replace(replace_root, ""))
        ordered_video_dirs.append(video_path.parent)
        aws_id = video_num
        
        labelbox_name = video_path.parent.stem + f"_aws{aws_id}.mp4"
        video_name = str(video_path.parent)
        labelbox_global_key = str(video_path.parent) + f"_aws{aws_id}.mp4"
        
        ordered_videos_with_aws_ids.append(labelbox_name)
        ordered_global_keys.append(labelbox_global_key)
        labelbox_external_id_to_global_key[labelbox_name] = labelbox_global_key
        video_name_to_global_key[video_name] = labelbox_global_key
        
# Parse COCO json
with open(coco_json_path, "r") as f:
    coco = json.load(f)

In [40]:
# Prints global_keys available in project

# Grab all global keys in project
project_global_keys_list = []
batches = project.batches()
for batch_num, batch in enumerate(batches):
    data_rows = batch.export_data_rows()
    for data_row in data_rows:
        global_key = data_row.global_key
        
        project_global_keys_list.append(global_key)

# Grab specific set of global keys (not recommended)
# project_global_keys_list = ["Summer2016/JoelsShoal30mTransects061016/P6100005_0m_20s_aws150.mp4", 
#                             "Oct2021_FishSurveys/Cocoloba 102421/P1010022_0m_13s_aws116.mp4",
#                             "Mar2017_FishSurveys/JS/P3270143_0m_44s_aws73.mp4"]

# Print relevant info
project_global_keys_set = set(project_global_keys_list)
print(len(project_global_keys_set))
print(project_global_keys_set)


163
{'Fall2019_FishSurveys/USVI_Fall2019_Yawzi_FishSurvey2_0m_12s_aws48.mp4', 'Nov2017_FishSurveys/TEK171124-3_0m_8s_aws99.mp4', '072017/Joel_Shoal_170726_T4_0m_10s_aws10.mp4', 'Summer2016/Yawzi30mTransects060816/P6080010_0m_12s_aws161.mp4', '072017/TK_3_90_0m_13s_aws12.mp4', 'Summer2016/TEK10mTransects060716/P6070006_0m_10s_aws153.mp4', 'Mar2017_FishSurveys/JS/P3270143_0m_44s_aws73.mp4', '072017/Coco4_0m_5s_aws6.mp4', 'Mar2017_FishSurveys/TK/P3250058_0m_15s_aws76.mp4', 'July2022_FishSurveys/JS/P7260010_0m_5s_aws62.mp4', 'May2023_FishSurveys/YA230508/P5080341_0m_9s_aws92.mp4', 'Nov2018_FishSurveys_WS/Joel_s Shoal/P1010027_0m_11s_aws107.mp4', 'May2023_FishSurveys/CO230509/P5090353_0m_12s_aws80.mp4', 'Oct2021_FishSurveys/Booby Rock 102521/PA250046_0m_8s_aws115.mp4', 'Aug2020_FishSurveys/Coco/P1010012_0m_11s_aws29.mp4', 'July2022_FishSurveys/CO/P7240159_0m_5s_aws59.mp4', 'Mar2017_FishSurveys/BR/P3240036_0m_23s_aws69.mp4', 'Apr2018_FishSurveys/Apr2018JoelsShoal2_0m_21s_aws17.mp4', 'Nov2017

In [41]:
# Making sure our assumptions about the COCO dataset hold
for i, annot in enumerate(coco["annotations"]):
    assert i == annot["id"]

for i, video_seq in enumerate(coco["video_sequences"]):
    assert i == video_seq["id"]
print("Assertions hold")

Assertions hold


In [43]:
# Converts full COCO file into large NDJSON labelbox payload
# Ignore video sequences that are not actually in the project
bbox_annotation_ndjson_payload = []

# NOTE: This assumes that order is maintained, does not actually parse JSON correctly
for object_track in coco['object_tracks']:
    video_seq_id = object_track["video_seq_id"]
    assert coco["video_sequences"][video_seq_id]["id"] == video_seq_id
    
    global_key = video_name_to_global_key[coco["video_sequences"][video_seq_id]["file_name"]]

    if global_key not in project_global_keys_set:
        continue
    
    key_frames = []

    # Map object_track image_ids to frame_number in the sequence
    frame_map = {}
    for frame_id, image_id in enumerate(coco["video_sequences"][video_seq_id]["image_id_list"]):
        frame_map[image_id] = frame_id
    
    for bbox_id, image_id in zip(object_track["bbox_id_list"], object_track["image_id_list"]):
        assert coco["annotations"][bbox_id]["id"] == bbox_id

        left, top, width, height = coco["annotations"][bbox_id]["bbox"]
        key_frames.append({"frame": frame_map[image_id] + 1, # labelbox frames are index-1
                          "bbox": {"top": top,
                                    "left": left,
                                    "height": height,
                                    "width": width}})
    
    bbox_annotation_ndjson = {
    "name" : ontology.tools()[0].name, # References an Ontology tool name
    "dataRow" : {"globalKey": global_key},
    "segments" : [{
        "keyframes" : key_frames
          }
        ]
    }
    bbox_annotation_ndjson_payload.append(bbox_annotation_ndjson)
print("Num object tracks to upload: ", len(bbox_annotation_ndjson_payload))
# bbox_annotation_ndjson_payload

Num object tracks to upload:  15802


In [44]:
# Actually upload the payload
upload_job_label_import = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name = "label_import_job-" + str(uuid.uuid4()),
    labels = bbox_annotation_ndjson_payload
)

upload_job_label_import.wait_until_done()
print("Errors:", upload_job_label_import.errors)
print("Status of uploads: ", upload_job_label_import.statuses)
print("   ")

Errors: []
Status of uploads:  [{'uuid': '40b78da1-21aa-4fc9-80fc-55fd0766bf07', 'dataRow': {'id': 'clqhokvhi0ndm07954k26bpzj', 'globalKey': '072017/BR1_1m_22s_aws0.mp4'}, 'status': 'SUCCESS'}, {'uuid': 'bb891fb3-1f71-408e-81cc-112444738c46', 'dataRow': {'id': 'clqhokvhi0ndm07954k26bpzj', 'globalKey': '072017/BR1_1m_22s_aws0.mp4'}, 'status': 'SUCCESS'}, {'uuid': '574cd8d3-a414-4405-8f72-7460d02280c3', 'dataRow': {'id': 'clqhokvhi0ndm07954k26bpzj', 'globalKey': '072017/BR1_1m_22s_aws0.mp4'}, 'status': 'SUCCESS'}, {'uuid': '53b8cb10-c36f-4dba-b08b-a2a4c83edb54', 'dataRow': {'id': 'clqhokvhi0ndm07954k26bpzj', 'globalKey': '072017/BR1_1m_22s_aws0.mp4'}, 'status': 'SUCCESS'}, {'uuid': '649ff111-bf01-4eb9-b701-7af6546e3e24', 'dataRow': {'id': 'clqhokvhi0ndm07954k26bpzj', 'globalKey': '072017/BR1_1m_22s_aws0.mp4'}, 'status': 'SUCCESS'}, {'uuid': 'f25bafac-5534-4bd9-8117-b3c844c620ba', 'dataRow': {'id': 'clqhokvhi0ndm07954k26bpzj', 'globalKey': '072017/BR1_1m_22s_aws0.mp4'}, 'status': 'SUCCESS