In [4]:
import labelbox as lb
import labelbox.types as lb_types
import uuid
import base64
import requests

# Setup client
with open("labelbox_api_key.txt","r") as f:
    API_KEY = f.read().strip()
client = lb.Client(api_key=API_KEY)

# Get ontology
print("===ONTOLOGY DETAILS===")
ontology = client.get_ontology("clqo6bd8v0jc407ybc1r9ehlb")
print("Name: ", ontology.name)
tools = ontology.tools()

# for tool in tools:
#   print(tool)

# Get project
print("\n===PROJECT DETAILS===")
PROJECT_ID = 'clqoh3ylw1o8s070hd6ch5z7o' # WHOI RSI USVI Fish
# PROJECT_ID = 'clqo7auln0mpo07wphorp0t2e' # Test WHOI RSI USVI Fish
project = client.get_project(PROJECT_ID)
print("Name: ", project.name)

# Get dataset
DATASET_ID = "clqh7v7qi001r07886j6aws7i"
dataset = client.get_dataset(DATASET_ID)
print("\n===DATASET DETAILS===")
print("Name: ", dataset.name)

===ONTOLOGY DETAILS===
Name:  WHOI-RSI-USVI-Fish

===PROJECT DETAILS===
Name:  WHOI-RSI-USVI-Fish-detect-and-track

===DATASET DETAILS===
Name:  imerit-26102023-3fps-clips


In [5]:
## General fish detector first
# Extract species and associate ids:
for option in tools[0].classifications[0].options:
    print(option.label)

# Match global keys to local directory structure
IMAGE_ROOT_DIR = "/media/cail/Veevee 2TB/whoi-rsi-fish-detection-datasets-yolo"
LABELS_JSON = "whoi-rsi-fish-detection-labels-04042024.json"

ABSA (Sergeant Major)
ACAN_UNKNOWN (Unknown Tang (Acanthuridae))
ACCH (Doctorfish)
ACCO (Blue Tang)
ACPO (Honeycomb Cowfish)
ACQU (Scrawled Cowfish)
ACTR (Surgeonfish)
ALSC (Scrawled Filefish)
ANVI (Porkfish)
AULO_UNKNOWN (Unknown Trumpetfish (Aulostomidae))
AUSP (Generic Trumpetfish)
AZCY (Blue Chromis)
AZMU (Brown Chromis)
BACH (Smallmouth Grunt)
BORU (Spanish Hogfish)
CAMA (Whitespotted Filefish)
CAPU (Orangespotted filefish)
CARA_UNKNOWN (Unknown Jack (Carangidae))
CARB (Yellow Jack)
CARL (Horse-eye Jack)
CARO (Sharpnose Puffer)
CARU (Bar Jack)
CECR (Graysby)
CEFU (Coney)
CHAE_UNKNOWN (Unknown Butterflyfish (Chaetodontidae))
CHCA (Four-eye Butterflyfish)
CHOC (Spotfin Butterflyfish)
CHST (Banded Butterflyfish)
CLPA (Creole Wrasse)
DAAM (Southern Stingray)
DASY_UNKNOWN (Unknown Whiptail Stingrays (Dasyatidae))
DIOH (Porcupinefish)
DOID_UNKNOWN (Unknown Porcupinefish (Diodontidate))
ECHE_UNKNOWN (Unknown Remora (Echeneidae))
EPAD (Rock Hind)
EPGU (Red Hind)
EPIN_UNKNOWN (Unknown Grou

In [12]:
## Recommended to download JSON from Labelbox using the Browser Interface
# TODO: Verify whether or not this includes interpolated (non-keyframed) data
import json
import jsonlines
import os
from pathlib import Path
from bbox_utils import *

json_path = "whoi-rsi-fish-detection-labels-04042024.json"
image_root = Path("/media/cail/Veevee 2TB/whoi-rsi-fish-detection-datasets-yolo")
output_root = Path("/media/cail/Veevee 2TB/whoi-rsi-fish-detection-datasets-yolo-labels")

# with open(json_path, "r") as f:
with jsonlines.open(json_path) as f:

    # Iterate through each video
    for i, datarow in enumerate(f):
        global_key = datarow["data_row"]["global_key"]

        img_sz = (datarow["media_attributes"]["width"], datarow["media_attributes"]["height"])
        
        # Video path
        vid_path = Path(global_key)
        rel_vid_path = vid_path.parent / "_".join(vid_path.stem.split("_")[:-1])

        # Grab frame labels
        try:
            frames_json = datarow["projects"][PROJECT_ID]["labels"][0]["annotations"]["frames"]
        except:
            print("Skipping ", global_key, " has no labels")
            continue

        # Iterate through frames
        for frame_id, frame_data in frames_json.items():
            img_filename = "frame_%03d.png"%(int(frame_id)-1)

            img_path = image_root / rel_vid_path / img_filename
            output_path = output_root / rel_vid_path / (img_filename + ".txt")

            # Verify this image exists
            assert(img_path.exists(), f"Image not found {img_path}")

            os.makedirs(output_path.parent, exist_ok=True)
            
            for object_id, object_data in frame_data["objects"].items():
                lbl_bbox = object_data["bounding_box"]

                # print(object_data["name"])
                
                # if len(object_data["classifications"]) > 0:
                #     print(object_data["classifications"])

                yolo_bbox = list(labelbox2yolo_bbox(lbl_bbox, img_sz))
                yolo_bbox.insert(0,0) # Fish class for now
                with open(output_path, "a") as f:
                    f.write(" ".join(map(str, yolo_bbox)))
                    f.write("\n")

print("done")

  assert(img_path.exists(), f"Image not found {img_path}")


Skipping  Summer2016/JoelsShoal30mTransects061016/P6100005_0m_20s_aws150.mp4  has no labels
done


In [15]:
import yaml

yolo_dataset = {
    "path": ".",
    "train": str(output_root.stem),
    "val": "",
    "test": "",
    "names": {0: "fish"},
}
yaml_data = yaml.dump(yolo_dataset)
with open(Path(output_root.parent) / "yolo_dataset.yaml", "w") as f:
    f.write(yaml_data)