In [1]:
from fishsense_api_sdk.client import Client
from label_studio_sdk.client import LabelStudio
from pathlib import Path
import cv2
from tqdm.notebook import tqdm
import pandas as pd
from datetime import datetime
from copy import deepcopy
from fishsense_api_sdk.models.species_label import SpeciesLabel

In [2]:
URL = "http://localhost:8000"
DIVE_ID = 471
PROJECT_ID = 63

In [3]:
OUTPUT_FOLDER = (Path("../output") / "preprocess_groups_jpeg").absolute()
MAP_CSV = Path("./species_map.csv").absolute()

OUTPUT_FOLDER.exists(), MAP_CSV.exists()

(True, True)

In [4]:
label_studio_api_key = Path("..") / ".label_studio_api_key"

label_studio_api_key.exists()

True

In [5]:
fs = Client(URL)
ls = LabelStudio(base_url=f"https://labeler.e4e.ucsd.edu", api_key=label_studio_api_key.read_text().strip())

In [6]:
dive = await fs.dives.get(dive_id=DIVE_ID)

dive

Dive(id=471, name='121024_Alligator2_FSL03', path='2025-03-13 Reef Data Dump/121024_Alligator_FSL03/121024_Alligator2_FSL03', dive_datetime=datetime.datetime(2024, 12, 10, 10, 18, 21, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=3, dive_slate_id=None)

In [7]:
images = await fs.images.get(dive_id=dive.id)

len(images), images[0]

(79,
 Image(id=129397, path='2025-03-13 Reef Data Dump/121024_Alligator_FSL03/121024_Alligator2_FSL03/PC100001.ORF', taken_datetime=datetime.datetime(2024, 12, 10, 9, 15, 50, tzinfo=TzInfo(0)), checksum='f31697fae3cdd06ab982ee3a8ede16fe', is_canonical=True, dive_id=471, camera_id=3))

In [8]:
species_labels = pd.read_csv("../scripts/species_labels.csv", keep_default_na=False)

species_labels

Unnamed: 0,Index,Column 8,Checksum,File Name,Species,Measurable,Not Center Fish,Column 6,Comments/Notes,Fish Angles/Curved Tail
0,0.00,2,a5dbaee04184b279e21745b917f85786,0_a5dbaee04184b279e21745b917f85786.JPG,Grey Snapper (Lutjanus griseus),True,FALSE,,,
1,1.00,2,520d35c076e36092c243f5cf6f02a7dc,1_520d35c076e36092c243f5cf6f02a7dc.JPG,Other (Identifiable but Nontarget),True,FALSE,,,
2,2.00,2,cc97ac368241f5f582541fd29f141a6b,2_cc97ac368241f5f582541fd29f141a6b.JPG,Hogfish (Lachnolaimus maximus),True,FALSE,,I don't think this one should be labeled measu...,Significant Curve
3,3.00,2,0ea63f5f073b0c6c4749c86ddd666925,3_0ea63f5f073b0c6c4749c86ddd666925.JPG,Stoplight Parrotfish (Sparisoma viride),False,FALSE,,,
4,4.00,2,f18aebcf2cfe3a390c3407d05085d0de,4_f18aebcf2cfe3a390c3407d05085d0de.JPG,Grey Snapper (Lutjanus griseus),True,FALSE,,,
...,...,...,...,...,...,...,...,...,...,...
2995,2995.00,5,53b300ae61154e8bdf9c31284f4b7eed,2995_53b300ae61154e8bdf9c31284f4b7eed.JPG,Stoplight Parrotfish (Sparisoma viride),True,FALSE,,,
2996,2996.00,5,f144e1420f1b40db9027bf79d37f8f6c,2996_f144e1420f1b40db9027bf79d37f8f6c.JPG,Stoplight Parrotfish (Sparisoma viride),False,FALSE,,,
2997,2997.00,5,e91bb07fce97f762157f6c56da6cc745,2997_e91bb07fce97f762157f6c56da6cc745.JPG,Stoplight Parrotfish (Sparisoma viride),True,TRUE,,,
2998,2998.00,5,eecf2abc577233c625e889db2ccfca63,2998_eecf2abc577233c625e889db2ccfca63.JPG,Unidentifiable (Cannot see),False,TRUE,,,


In [9]:
species_map = pd.read_csv(MAP_CSV, keep_default_na=False)

species_map

Unnamed: 0,gdrive_checksum,nas_checksum
0,8425323c429395cca532d409b5aa3b54,4ae8c7448e81014842009e0d0453273f
1,4ae8c7448e81014842009e0d0453273f,ee747067df21bd56a197e4d7df50063c
2,ee747067df21bd56a197e4d7df50063c,a26d7ce1b2eec2e59b70aa0dcb3231b1
3,a26d7ce1b2eec2e59b70aa0dcb3231b1,c82857290465d8837d62d456cf902baa
4,c82857290465d8837d62d456cf902baa,726e0e790528188a7f2e931b9ee4a367
...,...,...
307,95486b0e06655b35245d62e5241859b7,d110412cbcbe415518dc324502d758e9
308,d110412cbcbe415518dc324502d758e9,66913809f7599623d11e2bbfd697c001
309,66913809f7599623d11e2bbfd697c001,0a0f2151ac4f7a814d1bc62987eea562
310,0ea63f5f073b0c6c4749c86ddd666925,f18aebcf2cfe3a390c3407d05085d0de


In [10]:
def map_species_checksum(gdrive_checksum: str) -> str | None:
    row = species_map[species_map["gdrive_checksum"] == gdrive_checksum]
    if len(row) == 0:
        return gdrive_checksum
    return row["nas_checksum"].values[0]

In [11]:
image_checksums = {img.checksum for img in images}

len(image_checksums), list(image_checksums)[:5]

(79,
 ['a97238832fc6fc723c75ebe7b2e17e60',
  'e78fc30be841ad631119fb6e86e93b92',
  '1b7cd49dbbcc64b1335788f73aae688b',
  'c2c02974f0de30acaea07321b3b5249c',
  'c8eee9e5c13ef255a8a27dee8e850480'])

In [12]:
filtered_species_labels = {
    map_species_checksum(species_labels.iloc[idx]["Checksum"]):(species_labels.iloc[idx]["Species"], species_labels.iloc[idx]["Measurable"], species_labels.iloc[idx]["Not Center Fish"], species_labels.iloc[idx]["Column 6"], species_labels.iloc[idx]["Comments/Notes"], species_labels.iloc[idx]["Fish Angles/Curved Tail"])
    for idx in range(species_labels.shape[0])
    if map_species_checksum(species_labels.iloc[idx]["Checksum"]) is not None and species_labels.iloc[idx]["Species"] != 'Slate'
}

len(filtered_species_labels)

2654

In [13]:
len([i for _, _, _, _, _, i in filtered_species_labels.values() if i])

351

In [14]:
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

now

'2025-11-01 21:26:42'

In [15]:
filtered_species_labels_list = list(l for _, l, _, _, _, _ in filtered_species_labels.values() if l)

len(filtered_species_labels_list)

1767

In [16]:
fish_angle_options = {
    "x < 5°",
    "5° < x < 10°",
    "10° < x < 15°"
}

curved_tail_options = {
    "No Curve",
    "Slight Curve",
    "Significant Curve"
}

fish_angle_options, curved_tail_options

({'10° < x < 15°', '5° < x < 10°', 'x < 5°'},
 {'No Curve', 'Significant Curve', 'Slight Curve'})

In [22]:
tasks = []
template = {
    "model_version": f"sql_labels_{now}",
    "result": []
}

for image in tqdm(images):
    laser_label = await fs.labels.get_laser_label(image_id=image.id)
    species_label = None
    if image.checksum in filtered_species_labels:
        species_label = filtered_species_labels[image.checksum]
    image_path = OUTPUT_FOLDER / f"{image.checksum}.JPG"

    img = cv2.imread(str(image_path))
    height, width, _ = img.shape

    predictions = []
    if laser_label is not None and laser_label.label is not None:
        prediction = deepcopy(template)
        predictions = [prediction]

        prediction["result"].append({
            "id": "laser_result",
            "type": "keypointlabels",
            "to_name": "image", "from_name": "laser",
            "original_width": width, "original_height": height,
            "image_rotation": 0,
            "value": {
                "x": laser_label.x / width * 100,
                "y": laser_label.y / height * 100,
                "width": 0.2,
                "keypointlabels": [laser_label.label]
            }
        })

    if species_label is not None:
        if len(predictions) == 0:
            prediction = deepcopy(template)
            predictions = [prediction]
        else:
            prediction = predictions[0]

        species, measurable, not_center_fish, _, _, fish_angles_curved_tail = species_label

        # We won't use the slate labels since we didn't give options.
        prediction["result"].append({
            "id": "species_result",
            "type": "taxonomy",
            "to_name": "image", "from_name": "species",
            "value": {
                "taxonomy": [
                    [
                        "Fish",
                        species
                    ]
                ]
            }
        })

        measurable_value = "no"
        if not_center_fish:
            measurable_value = "yes, not center of fish"
        else:
            measurable_value = "yes, center of fish"

        prediction["result"].append({
            "id": "measurable_result",
            "type": "taxonomy",
            "to_name": "image", "from_name": "measurable",
            "value": {
                "taxonomy": [
                    [
                        measurable_value
                    ]
                ]
            }
        })

        for item in fish_angles_curved_tail.split(","):
            item = item.strip()

            if item in fish_angle_options:
                prediction["result"].append({
                    "id": "fish_angle_result",
                    "type": "taxonomy",
                    "to_name": "image", "from_name": "fishAngles",
                    "value": {
                        "taxonomy": [
                            [
                                item
                            ]
                        ]
                    }
                })
            elif item in curved_tail_options:
                prediction["result"].append({
                    "id": "curved_tail_result",
                    "type": "taxonomy",
                    "to_name": "image", "from_name": "fishCurve",
                    "value": {
                        "taxonomy": [
                            [
                                item
                            ]
                        ]
                    }
                })

    tasks.append({
        "data": {
            "image": f"https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/{image.checksum}"
        },
        "predictions": predictions,
        "annotations": []
    })

  0%|          | 0/79 [00:00<?, ?it/s]

In [18]:
tasks

[{'data': {'image': 'https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/f31697fae3cdd06ab982ee3a8ede16fe'},
  'predictions': [{'model_version': 'sql_labels_2025-11-01 21:26:42',
    'result': [{'id': 'laser_result',
      'type': 'keypointlabels',
      'to_name': 'image',
      'from_name': 'laser',
      'original_width': 4014,
      'original_height': 3016,
      'image_rotation': 0,
      'value': {'x': 53.16392625809666,
       'y': 46.12068965517241,
       'width': 0.2,
       'keypointlabels': ['Green Laser']}}]}],
  'annotations': []},
 {'data': {'image': 'https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/8bda788a0b6471980ff21eb345efcddf'},
  'predictions': [{'model_version': 'sql_labels_2025-11-01 21:26:42',
    'result': [{'id': 'laser_result',
      'type': 'keypointlabels',
      'to_name': 'image',
      'from_name': 'laser',
      'original_width': 4014,
      'original_height': 3016,
      'image_rotation': 0,
      'value': {'x': 51.0

In [19]:
imported_tasks = ls.projects.import_tasks(PROJECT_ID, request=tasks, return_task_ids=True)

imported_tasks

ProjectsImportTasksResponse(annotation_count=0, could_be_tasks_list=False, data_columns=[], duration=0.2182912826538086, file_upload_ids=[], found_formats=[], predictions_count=None, task_count=79, prediction_count=59, task_ids=[223542, 223543, 223544, 223545, 223546, 223547, 223548, 223549, 223550, 223551, 223552, 223553, 223554, 223555, 223556, 223557, 223558, 223559, 223560, 223561, 223562, 223563, 223564, 223565, 223566, 223567, 223568, 223569, 223570, 223571, 223572, 223573, 223574, 223575, 223576, 223577, 223578, 223579, 223580, 223581, 223582, 223583, 223584, 223585, 223586, 223587, 223588, 223589, 223590, 223591, 223592, 223593, 223594, 223595, 223596, 223597, 223598, 223599, 223600, 223601, 223602, 223603, 223604, 223605, 223606, 223607, 223608, 223609, 223610, 223611, 223612, 223613, 223614, 223615, 223616, 223617, 223618, 223619, 223620])

In [20]:
species_labels = [SpeciesLabel(id=None, image_id=image.id, label_studio_task_id=task_id, image_url=f"https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/{image.checksum}", updated_at=None, completed=False, label_studio_json={}, user_id=None) for task_id, image in zip(imported_tasks.task_ids, images)]

len(species_labels), species_labels[0]

(79,
 SpeciesLabel(id=None, label_studio_task_id=223542, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/f31697fae3cdd06ab982ee3a8ede16fe', updated_at=None, completed=False, label_studio_json={}, image_id=129397, user_id=None))

In [21]:
for species_label in tqdm(species_labels):
    await fs.labels.post_species_label(species_label.image_id, species_label)

  0%|          | 0/79 [00:00<?, ?it/s]