In [28]:
from fishsense_api_sdk.client import Client
from label_studio_sdk.client import LabelStudio
from pathlib import Path
import cv2
from tqdm.notebook import tqdm
import pandas as pd
from datetime import datetime
from copy import deepcopy
import asyncio
from fishsense_api_sdk.models.species_label import SpeciesLabel
from fishsense_data_processing_workflow_worker.config import settings

In [7]:
DIVE_ID = 383
PROJECT_ID = 70

In [8]:
OUTPUT_FOLDER = (Path("../output") / "preprocess_groups_jpeg").absolute()
MAP_CSV = Path("./species_map.csv").absolute()

OUTPUT_FOLDER.exists(), MAP_CSV.exists()

(True, True)

In [9]:
label_studio_api_key = Path("..") / ".label_studio_api_key"

label_studio_api_key.exists()

True

In [10]:
ls = LabelStudio(base_url=f"https://labeler.e4e.ucsd.edu", api_key=label_studio_api_key.read_text().strip())

In [12]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as fs:
    dive = await fs.dives.get(dive_id=DIVE_ID)

dive

Dive(id=383, name='062624_FiveSeas_FSL03', path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03', dive_datetime=datetime.datetime(2024, 6, 26, 7, 52, 45, tzinfo=TzInfo(0)), priority=<Priority.LOW: 'LOW'>, flip_dive_slate=None, camera_id=3, dive_slate_id=None)

In [13]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as fs:
    images = await fs.images.get(dive_id=dive.id)

len(images), images[0]

(40,
 Image(id=111909, path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03/P6260560.ORF', taken_datetime=datetime.datetime(2024, 6, 26, 6, 53, 31, tzinfo=TzInfo(0)), checksum='708d385dc06f8fd60c46683478e77b2b', is_canonical=True, dive_id=383, camera_id=3))

In [14]:
species_labels = pd.read_csv("../scripts/species_labels.csv", keep_default_na=False)

species_labels

Unnamed: 0,Index,Column 8,Checksum,File Name,Species,Measurable,Not Center Fish,Column 6,Comments/Notes,Fish Angles/Curved Tail
0,0.00,2,a5dbaee04184b279e21745b917f85786,0_a5dbaee04184b279e21745b917f85786.JPG,Grey Snapper (Lutjanus griseus),True,FALSE,,,
1,1.00,2,520d35c076e36092c243f5cf6f02a7dc,1_520d35c076e36092c243f5cf6f02a7dc.JPG,Other (Identifiable but Nontarget),True,FALSE,,,
2,2.00,2,cc97ac368241f5f582541fd29f141a6b,2_cc97ac368241f5f582541fd29f141a6b.JPG,Hogfish (Lachnolaimus maximus),True,FALSE,,I don't think this one should be labeled measu...,Significant Curve
3,3.00,2,0ea63f5f073b0c6c4749c86ddd666925,3_0ea63f5f073b0c6c4749c86ddd666925.JPG,Stoplight Parrotfish (Sparisoma viride),False,FALSE,,,
4,4.00,2,f18aebcf2cfe3a390c3407d05085d0de,4_f18aebcf2cfe3a390c3407d05085d0de.JPG,Grey Snapper (Lutjanus griseus),True,FALSE,,,
...,...,...,...,...,...,...,...,...,...,...
2995,2995.00,5,53b300ae61154e8bdf9c31284f4b7eed,2995_53b300ae61154e8bdf9c31284f4b7eed.JPG,Stoplight Parrotfish (Sparisoma viride),True,FALSE,,,
2996,2996.00,5,f144e1420f1b40db9027bf79d37f8f6c,2996_f144e1420f1b40db9027bf79d37f8f6c.JPG,Stoplight Parrotfish (Sparisoma viride),False,FALSE,,,
2997,2997.00,5,e91bb07fce97f762157f6c56da6cc745,2997_e91bb07fce97f762157f6c56da6cc745.JPG,Stoplight Parrotfish (Sparisoma viride),True,TRUE,,,
2998,2998.00,5,eecf2abc577233c625e889db2ccfca63,2998_eecf2abc577233c625e889db2ccfca63.JPG,Unidentifiable (Cannot see),False,TRUE,,,


In [15]:
species_map = pd.read_csv(MAP_CSV, keep_default_na=False)

species_map

Unnamed: 0,gdrive_checksum,nas_checksum
0,8425323c429395cca532d409b5aa3b54,4ae8c7448e81014842009e0d0453273f
1,4ae8c7448e81014842009e0d0453273f,ee747067df21bd56a197e4d7df50063c
2,ee747067df21bd56a197e4d7df50063c,a26d7ce1b2eec2e59b70aa0dcb3231b1
3,a26d7ce1b2eec2e59b70aa0dcb3231b1,c82857290465d8837d62d456cf902baa
4,c82857290465d8837d62d456cf902baa,726e0e790528188a7f2e931b9ee4a367
...,...,...
307,95486b0e06655b35245d62e5241859b7,d110412cbcbe415518dc324502d758e9
308,d110412cbcbe415518dc324502d758e9,66913809f7599623d11e2bbfd697c001
309,66913809f7599623d11e2bbfd697c001,0a0f2151ac4f7a814d1bc62987eea562
310,0ea63f5f073b0c6c4749c86ddd666925,f18aebcf2cfe3a390c3407d05085d0de


In [16]:
def map_species_checksum(gdrive_checksum: str) -> str | None:
    row = species_map[species_map["gdrive_checksum"] == gdrive_checksum]
    if len(row) == 0:
        return gdrive_checksum
    return row["nas_checksum"].values[0]

In [17]:
image_checksums = {img.checksum for img in images}

len(image_checksums), list(image_checksums)[:5]

(40,
 ['cc3d46abe478dfd34731a074b77afe25',
  'dd36dcfdad4d85e0cc187e5986621d14',
  '3aa5be29b05c9024004534b3ff434578',
  '65d6ca11a1d03d89c59e505cb6ddd557',
  '67c6ae67d5af7120d246a0f2150c25c6'])

In [18]:
filtered_species_labels = {
    map_species_checksum(species_labels.iloc[idx]["Checksum"]):(species_labels.iloc[idx]["Species"], species_labels.iloc[idx]["Measurable"], species_labels.iloc[idx]["Not Center Fish"], species_labels.iloc[idx]["Column 6"], species_labels.iloc[idx]["Comments/Notes"], species_labels.iloc[idx]["Fish Angles/Curved Tail"])
    for idx in range(species_labels.shape[0])
    if map_species_checksum(species_labels.iloc[idx]["Checksum"]) is not None and species_labels.iloc[idx]["Species"] != 'Slate'
}

len(filtered_species_labels)

2654

In [19]:
len([i for _, _, _, _, _, i in filtered_species_labels.values() if i])

351

In [21]:
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

now

'2025-12-08 14:45:03'

In [15]:
filtered_species_labels_list = list(l for _, l, _, _, _, _ in filtered_species_labels.values() if l)

len(filtered_species_labels_list)

1767

In [22]:
fish_angle_options = {
    "x < 5°",
    "5° < x < 10°",
    "10° < x < 15°"
}

curved_tail_options = {
    "No Curve",
    "Slight Curve",
    "Significant Curve"
}

fish_angle_options, curved_tail_options

({'10° < x < 15°', '5° < x < 10°', 'x < 5°'},
 {'No Curve', 'Significant Curve', 'Slight Curve'})

In [23]:
tasks = []
template = {
    "model_version": f"sql_labels_{now}",
    "result": []
}

for image in tqdm(images):
    laser_label = await fs.labels.get_laser_label(image_id=image.id)
    species_label = None
    if image.checksum in filtered_species_labels:
        species_label = filtered_species_labels[image.checksum]
    image_path = OUTPUT_FOLDER / f"{image.checksum}.JPG"

    img = cv2.imread(str(image_path))
    height, width, _ = img.shape

    predictions = []
    if laser_label is not None and laser_label.label is not None:
        prediction = deepcopy(template)
        predictions = [prediction]

        prediction["result"].append({
            "id": "laser_result",
            "type": "keypointlabels",
            "to_name": "image", "from_name": "laser",
            "original_width": width, "original_height": height,
            "image_rotation": 0,
            "value": {
                "x": laser_label.x / width * 100,
                "y": laser_label.y / height * 100,
                "width": 0.2,
                "keypointlabels": [laser_label.label]
            }
        })

    if species_label is not None:
        if len(predictions) == 0:
            prediction = deepcopy(template)
            predictions = [prediction]
        else:
            prediction = predictions[0]

        species, measurable, not_center_fish, _, _, fish_angles_curved_tail = species_label

        # We won't use the slate labels since we didn't give options.
        prediction["result"].append({
            "id": "species_result",
            "type": "taxonomy",
            "to_name": "image", "from_name": "species",
            "value": {
                "taxonomy": [
                    [
                        "Fish",
                        species
                    ]
                ]
            }
        })

        measurable_value = "no"
        if not_center_fish:
            measurable_value = "yes, not center of fish"
        else:
            measurable_value = "yes, center of fish"

        prediction["result"].append({
            "id": "measurable_result",
            "type": "taxonomy",
            "to_name": "image", "from_name": "measurable",
            "value": {
                "taxonomy": [
                    [
                        measurable_value
                    ]
                ]
            }
        })

        for item in fish_angles_curved_tail.split(","):
            item = item.strip()

            if item in fish_angle_options:
                prediction["result"].append({
                    "id": "fish_angle_result",
                    "type": "taxonomy",
                    "to_name": "image", "from_name": "fishAngles",
                    "value": {
                        "taxonomy": [
                            [
                                item
                            ]
                        ]
                    }
                })
            elif item in curved_tail_options:
                prediction["result"].append({
                    "id": "curved_tail_result",
                    "type": "taxonomy",
                    "to_name": "image", "from_name": "fishCurve",
                    "value": {
                        "taxonomy": [
                            [
                                item
                            ]
                        ]
                    }
                })

    tasks.append({
        "data": {
            "image": f"https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/{image.checksum}"
        },
        "predictions": predictions,
        "annotations": []
    })

  0%|          | 0/40 [00:00<?, ?it/s]

In [24]:
tasks

[{'data': {'image': 'https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/708d385dc06f8fd60c46683478e77b2b'},
  'predictions': [],
  'annotations': []},
 {'data': {'image': 'https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/4711763b48368af835dc397c61411dab'},
  'predictions': [],
  'annotations': []},
 {'data': {'image': 'https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/a16c10d11e3ea788f966800c77af1501'},
  'predictions': [],
  'annotations': []},
 {'data': {'image': 'https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/d8c968f9f1582fc6218064d57196f6f0'},
  'predictions': [{'model_version': 'sql_labels_2025-12-08 14:45:03',
    'result': [{'id': 'laser_result',
      'type': 'keypointlabels',
      'to_name': 'image',
      'from_name': 'laser',
      'original_width': 4014,
      'original_height': 3016,
      'image_rotation': 0,
      'value': {'x': 51.842023567094245,
       'y': 48.708103841137216,
       'width': 

In [25]:
imported_tasks = ls.projects.import_tasks(PROJECT_ID, request=tasks, return_task_ids=True)

imported_tasks

ImportTasksProjectsResponse(annotation_count=0, could_be_tasks_list=False, data_columns=[], duration=0.2959258556365967, file_upload_ids=[], found_formats=[], predictions_count=None, task_count=40, prediction_count=5, task_ids=[224623, 224624, 224625, 224626, 224627, 224628, 224629, 224630, 224631, 224632, 224633, 224634, 224635, 224636, 224637, 224638, 224639, 224640, 224641, 224642, 224643, 224644, 224645, 224646, 224647, 224648, 224649, 224650, 224651, 224652, 224653, 224654, 224655, 224656, 224657, 224658, 224659, 224660, 224661, 224662])

In [27]:
species_labels = [SpeciesLabel(id=None, image_id=image.id, label_studio_task_id=task_id, label_studio_project_id=PROJECT_ID, image_url=f"https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/{image.checksum}", updated_at=None, completed=False, label_studio_json={}, user_id=None, grouping=None, top_three_photos_of_group=None, slate_upside_down=None, laser_x=None, laser_y=None, laser_label=None, content_of_image=None, fish_measurable_category=None, fish_angle_category=None, fish_curved_category=None) for task_id, image in zip(imported_tasks.task_ids, images)]

len(species_labels), species_labels[0]

(40,
 SpeciesLabel(id=None, label_studio_task_id=224623, label_studio_project_id=70, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/708d385dc06f8fd60c46683478e77b2b', updated_at=None, completed=False, grouping=None, top_three_photos_of_group=None, slate_upside_down=None, laser_x=None, laser_y=None, laser_label=None, content_of_image=None, fish_measurable_category=None, fish_angle_category=None, fish_curved_category=None, label_studio_json={}, image_id=111909, user_id=None))

In [29]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as fs:
    async with asyncio.TaskGroup() as tg:
        for species_label in tqdm(species_labels):
            tg.create_task(fs.labels.put_species_label(species_label.image_id, species_label))

  0%|          | 0/40 [00:00<?, ?it/s]