In [2]:
import labelbox as lb
import labelbox.types as lb_types
import uuid
import base64
import requests

# Setup client
with open("labelbox_api_key.txt","r") as f:
    API_KEY = f.read().strip()
client = lb.Client(api_key=API_KEY)

# Get ontology
print("===ONTOLOGY DETAILS===")
ontology = client.get_ontology("clqo6bd8v0jc407ybc1r9ehlb")
print("Name: ", ontology.name)
tools = ontology.tools()
for tool in tools:
  print(tool)

# Get project
print("\n===PROJECT DETAILS===")
PROJECT_ID = 'clqoh3ylw1o8s070hd6ch5z7o' # WHOI RSI USVI Fish
# PROJECT_ID = 'clqo7auln0mpo07wphorp0t2e' # Test WHOI RSI USVI Fish
project = client.get_project(PROJECT_ID)
print("Name: ", project.name)

# Get dataset
DATASET_ID = "clqh7v7qi001r07886j6aws7i"
dataset = client.get_dataset(DATASET_ID)
print("\n===DATASET DETAILS===")
print("Name: ", dataset.name)

===ONTOLOGY DETAILS===
Name:  WHOI-RSI-USVI-Fish
Tool(tool=<Type.BBOX: 'rectangle'>, name='Fish', required=False, color='#ffa900', classifications=[Classification(class_type=<Type.RADIO: 'radio'>, name='species', instructions='Species', required=False, options=[Option(value='absa_sergeant_major', label='ABSA (Sergeant Major)', schema_id='clr6rzlg40jca07yvbsh21gix', feature_schema_id='clr6rzlg40jc907yvaa6c74uy', options=[]), Option(value='acan_unknown', label='ACAN_UNKNOWN (Unknown Tang (Acanthuridae))', schema_id='clr6rzlg40jcc07yvdvimbb07', feature_schema_id='clr6rzlg40jcb07yv2wiu7we2', options=[]), Option(value='acch_doctorfish', label='ACCH (Doctorfish)', schema_id='clr6rzlg40jce07yvc1bf39h4', feature_schema_id='clr6rzlg40jcd07yv93yo4448', options=[]), Option(value='acco_blue_tang', label='ACCO (Blue Tang)', schema_id='clr6rzlg40jcg07yvhpdl0y1h', feature_schema_id='clr6rzlg40jcf07yvek4v76ng', options=[]), Option(value='acpo_honeycomb_cowfish', label='ACPO (Honeycomb Cowfish)', schem

In [5]:
# Verify Datarow metadata
data_rows = dataset.export_data_rows()

assert len(list(data_rows)) == 163, len(list(data_rows))

for data_row_ind, data_row in enumerate(data_rows):
    assert data_row.media_attributes["frameCount"]==90, data_row
    assert data_row.media_attributes["frameRate"]==3, data_row


In [6]:
# Export JSON Labels
params = {
	"data_row_details": True,
	"metadata_fields": True,
	"attachments": True,
	"project_details": True,
	"performance_details": True,
	"label_details": True,
	"interpolated_frames": True
}

project = client.get_project(PROJECT_ID)
export_task = project.export_v2(params=params)

export_task.wait_till_done()
if export_task.errors:
	print(export_task.errors)
export_json = export_task.result
print(export_json)

ValueError: Job status still in `IN_PROGRESS`. The result is not available. Call task.wait_till_done() with a larger timeout or contact support.

In [None]:
output_data = "whoi-rsi-fish-detections-labels.json"
with open(output_data, 'w') as f:
    json.dump(export_json, f)

In [4]:
import glob
import jsonlines
from pathlib import Path
import natsort
import json

# Setup local directories
dataset_root = Path("/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023")
images_dir = "/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023"
manifest_path = "/media/data/warp_data/marine_detection/imerit/whoi-rsi-fish-detection-datasets-22122023/28102023_manifest.json"
coco_json_path = dataset_root / "coco.json"

# Parse manifest to get proper filenames and directories
# We use the relative path and video name with aws_id as GLOBAL_KEYS for LABELBOX
ordered_video_dirs = []
ordered_global_keys = []
ordered_videos_with_aws_ids = []
name_to_global_key = {}
replace_root = "s3://whoi-rsi-fish-detection/datasets/imerit_26102023_clips/"
with jsonlines.open(manifest_path) as f:
    for video_num, video_json in enumerate(f):
        video_path = Path(video_json['source-ref'].replace(replace_root, ""))
        ordered_video_dirs.append(video_path.parent)
        aws_id = video_num
        
        labelbox_name = video_path.parent.stem + f"_aws{aws_id}.mp4"
        labelbox_global_key = str(video_path.parent) + f"_aws{aws_id}.mp4"
        
        ordered_videos_with_aws_ids.append(labelbox_name)
        ordered_global_keys.append(labelbox_global_key)
        name_to_global_key[labelbox_name] = labelbox_global_key

In [None]:
# Assign global keys to dataset (DO NOT RUN THIS UNLESS YOU KNOW WHAT YOU'RE DOING)
dry_run = True

data_rows = dataset.export_data_rows()
global_key_assignments = []
for data_row in data_rows:
    # if global_key exists, probably should not run this at all
    assert data_row.global_key is None, data_row
    # print(data_row)
    global_key_assignments.append({
        "data_row_id": data_row.uid,
        "global_key": name_to_global_key[data_row.external_id]
    })
    
if dry_run:
    print("Num keys to assign: ", len(global_key_assignments))
    print("Payload", global_key_assignments)
else:
    print("Num keys to assign: ", len(global_key_assignments))
    res = client.assign_global_keys_to_data_rows(global_key_assignments)
    print(res)
    