In [7]:
from fishsense_api_sdk.client import Client
from pathlib import Path
from synology_api.filestation import FileStation
from fishsense_data_processing_workflow_worker.config import settings
from tqdm.asyncio import tqdm_asyncio
from tqdm.notebook import tqdm  
from skimage.exposure import adjust_gamma, equalize_adapthist
from skimage.util import img_as_float, img_as_ubyte
import cv2
import math
import numpy as np
import rawpy

In [8]:
URL = "http://localhost:8000"

NAS_HOST = "e4e-nas.ucsd.edu"
NAS_PORT = 6021

In [None]:
DATA_FOLDER = (Path("../data") / "REEF" / "data").absolute()
OUTPUT_FOLDER = (Path("../output") / "preprocess_headtail_jpeg").absolute()

DATA_FOLDER.mkdir(parents=True, exist_ok=True)
OUTPUT_FOLDER.mkdir(parents=True, exist_ok=True)

DATA_FOLDER.exists(), OUTPUT_FOLDER.exists()

(True, True)

In [10]:
filestation = FileStation(NAS_HOST, NAS_PORT, settings.e4e_nas.username, settings.e4e_nas.password, secure=True, cert_verify=False)

In [11]:
async with Client(URL) as client:
    dives = await client.dives.get_canonical()

len(dives), dives

(272,
 [Dive(id=1, name='080123_FSL-01 Photos', path='2023-09-07 REEF Data Dump/080123_FSL-01 Photos', dive_datetime=datetime.datetime(2023, 8, 1, 12, 46, 27, tzinfo=TzInfo(0)), priority=<Priority.LOW: 'LOW'>, flip_dive_slate=True, camera_id=1, dive_slate_id=1, laser_calibration_id=None),
  Dive(id=5, name='Hogfish01_MolHITW_0926_080323', path='2023.08.03.FishSense.FSL-01D/Hogfish01_MolHITW_0926_080323', dive_datetime=datetime.datetime(2023, 8, 3, 9, 27, 23, tzinfo=TzInfo(0)), priority=<Priority.LOW: 'LOW'>, flip_dive_slate=None, camera_id=1, dive_slate_id=None, laser_calibration_id=None),
  Dive(id=8, name='DogSnapper01_MolPeLe_1024_080323', path='2023.08.03.FishSense.FSL-01D/DogSnapper01_MolPeLe_1024_080323', dive_datetime=datetime.datetime(2023, 8, 3, 10, 24, 59, tzinfo=TzInfo(0)), priority=<Priority.LOW: 'LOW'>, flip_dive_slate=None, camera_id=1, dive_slate_id=None, laser_calibration_id=None),
  Dive(id=14, name='Hogfish01_ConchLed_1419_080323', path='2023.08.03.FishSense.FSL-01D/H

In [12]:
high_priority_dives = [dive for dive in dives if dive.priority == "HIGH"]

len(high_priority_dives), high_priority_dives

(7,
 [Dive(id=279, name='111323_Alligator DeepEast Drift2_FSL05', path='drive-download-20240307T1050Z/112023_Alligator/111323_Alligator/111323_Alligator_FSL05/111323_Alligator DeepEast Drift2_FSL05', dive_datetime=datetime.datetime(2023, 11, 13, 12, 34, 8, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=5, dive_slate_id=7, laser_calibration_id=None),
  Dive(id=341, name='031424_Alligator0_FSL01', path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01', dive_datetime=datetime.datetime(2024, 3, 13, 23, 59, 45, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=1, dive_slate_id=8, laser_calibration_id=None),
  Dive(id=347, name='031424_Alligator0_FSL02', path='2025-02-10 REEF Data Dump SMILE 6/031424_Alligator_FSL02/031424_Alligator0_FSL02', dive_datetime=datetime.datetime(2024, 3, 14, 12, 15, 46, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=2, div

In [13]:
dives_by_id = {dive.id: dive for dive in high_priority_dives}

len(dives_by_id), dives_by_id

(7,
 {279: Dive(id=279, name='111323_Alligator DeepEast Drift2_FSL05', path='drive-download-20240307T1050Z/112023_Alligator/111323_Alligator/111323_Alligator_FSL05/111323_Alligator DeepEast Drift2_FSL05', dive_datetime=datetime.datetime(2023, 11, 13, 12, 34, 8, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=5, dive_slate_id=7, laser_calibration_id=None),
  341: Dive(id=341, name='031424_Alligator0_FSL01', path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01', dive_datetime=datetime.datetime(2024, 3, 13, 23, 59, 45, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=1, dive_slate_id=8, laser_calibration_id=None),
  347: Dive(id=347, name='031424_Alligator0_FSL02', path='2025-02-10 REEF Data Dump SMILE 6/031424_Alligator_FSL02/031424_Alligator0_FSL02', dive_datetime=datetime.datetime(2024, 3, 14, 12, 15, 46, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, c

In [14]:
async with Client(URL) as client:
    existing_headtail_labels = await tqdm_asyncio.gather(*[client.labels.get_headtail_labels(dive.id) for dive in high_priority_dives])
    existing_headtail_labels = [label for sublist in existing_headtail_labels for label in sublist]


len(existing_headtail_labels), existing_headtail_labels

100%|██████████| 7/7 [00:01<00:00,  6.25it/s]


(482,
 [HeadTailLabel(id=23066, label_studio_task_id=180500, label_studio_project_id=45, head_x=None, head_y=None, tail_x=None, tail_y=None, updated_at=None, superseded=False, completed=True, label_studio_json={'annotations': [{'id': 116543, 'result': [], 'created_username': ' jessarmacsot@gmail.com, 46', 'created_ago': '2\xa0months', 'completed_by': 46, 'was_cancelled': False, 'ground_truth': False, 'created_at': '2025-09-23T00:22:53.720209Z', 'updated_at': '2025-09-23T00:22:53.720220Z', 'draft_created_at': None, 'lead_time': 26.424, 'import_id': None, 'last_action': None, 'bulk_created': False, 'task': 180500, 'project': 45, 'updated_by': 46, 'parent_prediction': None, 'parent_annotation': None, 'last_created_by': None}], 'annotations_ids': '116543', 'annotations_results': '[]', 'annotators': [46], 'avg_lead_time': 26.424, 'cancelled_annotations': 0, 'comment_authors': [], 'comment_count': 0, 'completed_at': '2025-09-23T00:22:53.720209Z', 'created_at': '2025-04-14T06:01:25.651616Z', 

In [15]:
headtail_labels_by_image_id = {label.image_id: label for label in existing_headtail_labels}

len(headtail_labels_by_image_id), headtail_labels_by_image_id

(482,
 {78728: HeadTailLabel(id=23066, label_studio_task_id=180500, label_studio_project_id=45, head_x=None, head_y=None, tail_x=None, tail_y=None, updated_at=None, superseded=False, completed=True, label_studio_json={'annotations': [{'id': 116543, 'result': [], 'created_username': ' jessarmacsot@gmail.com, 46', 'created_ago': '2\xa0months', 'completed_by': 46, 'was_cancelled': False, 'ground_truth': False, 'created_at': '2025-09-23T00:22:53.720209Z', 'updated_at': '2025-09-23T00:22:53.720220Z', 'draft_created_at': None, 'lead_time': 26.424, 'import_id': None, 'last_action': None, 'bulk_created': False, 'task': 180500, 'project': 45, 'updated_by': 46, 'parent_prediction': None, 'parent_annotation': None, 'last_created_by': None}], 'annotations_ids': '116543', 'annotations_results': '[]', 'annotators': [46], 'avg_lead_time': 26.424, 'cancelled_annotations': 0, 'comment_authors': [], 'comment_count': 0, 'completed_at': '2025-09-23T00:22:53.720209Z', 'created_at': '2025-04-14T06:01:25.651

In [16]:
async with Client(URL) as client:
    species_labels = await tqdm_asyncio.gather(*[client.labels.get_species_labels(dive.id) for dive in high_priority_dives])
    species_labels = [label for sublist in species_labels for label in sublist]

len(species_labels), species_labels

100%|██████████| 7/7 [00:01<00:00,  6.70it/s]


(1178,
 [SpeciesLabel(id=1024, label_studio_task_id=222252, label_studio_project_id=57, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/7252dfb0db0b855616e45607f58c1c93', updated_at=datetime.datetime(2025, 10, 20, 20, 42, 14, 832143, tzinfo=TzInfo(0)), completed=True, grouping=None, top_three_photos_of_group=None, slate_upside_down=True, laser_x=1970.0, laser_y=1340.0, laser_label='Red Laser', content_of_image='Slate, Laser not on slate', fish_measurable_category=None, fish_angle_category=None, fish_curved_category=None, label_studio_json={'annotations': [{'id': 117942, 'result': [{'id': 'result1', 'type': 'keypointlabels', 'value': {'x': 49.08221189998697, 'y': 44.44128413294742, 'width': 0.2, 'keypointlabels': ['Red Laser']}, 'origin': 'prediction-changed', 'to_name': 'image', 'from_name': 'laser', 'image_rotation': 0, 'original_width': 4014, 'original_height': 3016}, {'id': 'm4K68yydiZ', 'type': 'taxonomy', 'value': {'taxonomy': [['Slate', 'Laser not o

In [17]:
species_label_included = [label for label in species_labels if label.top_three_photos_of_group]

len(species_label_included), species_label_included

(241,
 [SpeciesLabel(id=1059, label_studio_task_id=222287, label_studio_project_id=57, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/106483baa424c4188740a4e1d69a2a21', updated_at=datetime.datetime(2025, 10, 21, 2, 1, 48, 534195, tzinfo=TzInfo(0)), completed=True, grouping=None, top_three_photos_of_group=True, slate_upside_down=None, laser_x=1908.0, laser_y=1106.0, laser_label='Red Laser', content_of_image='Fish, Hogfish (Lachnolaimus maximus)', fish_measurable_category='yes, center of fish', fish_angle_category='x < 5°', fish_curved_category='No Curve', label_studio_json={'annotations': [{'id': 118339, 'result': [{'id': 'result1', 'type': 'keypointlabels', 'value': {'x': 47.524835773227366, 'y': 36.6763674913711, 'width': 0.2, 'keypointlabels': ['Red Laser']}, 'origin': 'prediction', 'to_name': 'image', 'from_name': 'laser', 'image_rotation': 0, 'original_width': 4014, 'original_height': 3016}, {'id': 'ciK0QA_3rc', 'type': 'taxonomy', 'value': {'taxonom

In [18]:
incomplete_species_labels = [label for label in species_label_included if headtail_labels_by_image_id.get(label.image_id) is None or not headtail_labels_by_image_id[label.image_id].completed]

len(incomplete_species_labels), incomplete_species_labels

(156,
 [SpeciesLabel(id=877, label_studio_task_id=222642, label_studio_project_id=58, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/ce87c749c095a4a4722faa38a387c7d4', updated_at=datetime.datetime(2025, 10, 29, 14, 47, 12, 500561, tzinfo=TzInfo(0)), completed=True, grouping='Part of previous group', top_three_photos_of_group=True, slate_upside_down=None, laser_x=1903.0, laser_y=1195.0, laser_label='Red Laser', content_of_image='Fish, Hogfish (Lachnolaimus maximus)', fish_measurable_category='yes, center of fish', fish_angle_category='5° < x < 10°', fish_curved_category=None, label_studio_json={'annotations': [{'id': 120435, 'result': [{'id': 'laser_result', 'type': 'keypointlabels', 'value': {'x': 47.397379563610144, 'y': 39.60875697607368, 'width': 0.2, 'keypointlabels': ['Red Laser']}, 'origin': 'prediction', 'to_name': 'image', 'from_name': 'laser', 'image_rotation': 0, 'original_width': 4014, 'original_height': 3016}, {'id': 'species_result', 'type':

In [19]:
async with Client(URL) as client:
    incomplete_images = await tqdm_asyncio.gather(*[client.images.get(image_id=label.image_id) for label in incomplete_species_labels])

len(incomplete_images), incomplete_images

100%|██████████| 156/156 [00:03<00:00, 42.54it/s]


(156,
 [Image(id=101302, path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01/P3130405.ORF', taken_datetime=datetime.datetime(2024, 3, 13, 23, 27, 40, tzinfo=TzInfo(0)), checksum='ce87c749c095a4a4722faa38a387c7d4', is_canonical=True, dive_id=341, camera_id=1),
  Image(id=101328, path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01/P3130430.ORF', taken_datetime=datetime.datetime(2024, 3, 13, 23, 30, 35, tzinfo=TzInfo(0)), checksum='4bc17a48a92f281557160344ea69fc1e', is_canonical=True, dive_id=341, camera_id=1),
  Image(id=101416, path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01/P3130520.ORF', taken_datetime=datetime.datetime(2024, 3, 13, 23, 54, 35, tzinfo=TzInfo(0)), checksum='ae6057f3995b2bd0cd0e564be33ecdab', is_canonical=True, dive_id=341, camera_id=1),
  Image(id=102761, path='2025-02-10 REEF Data Dump SMILE 6/031424_Alligator_FSL02/031424_Alligator0_FSL02/P3140070.ORF', taken_

In [20]:
image_by_id = {image.id: image for image in incomplete_images}

len(image_by_id), image_by_id

(156,
 {101302: Image(id=101302, path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01/P3130405.ORF', taken_datetime=datetime.datetime(2024, 3, 13, 23, 27, 40, tzinfo=TzInfo(0)), checksum='ce87c749c095a4a4722faa38a387c7d4', is_canonical=True, dive_id=341, camera_id=1),
  101328: Image(id=101328, path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01/P3130430.ORF', taken_datetime=datetime.datetime(2024, 3, 13, 23, 30, 35, tzinfo=TzInfo(0)), checksum='4bc17a48a92f281557160344ea69fc1e', is_canonical=True, dive_id=341, camera_id=1),
  101416: Image(id=101416, path='2024.06.20.REEF/03_2024_Alligator/031424_Alligator_FSL01/031424_Alligator0_FSL01/P3130520.ORF', taken_datetime=datetime.datetime(2024, 3, 13, 23, 54, 35, tzinfo=TzInfo(0)), checksum='ae6057f3995b2bd0cd0e564be33ecdab', is_canonical=True, dive_id=341, camera_id=1),
  102761: Image(id=102761, path='2025-02-10 REEF Data Dump SMILE 6/031424_Alligator_FSL02/031424_Alliga

In [21]:
async with Client(URL) as client:
    cameras = await tqdm_asyncio.gather(*[client.cameras.get(dive.camera_id) for dive in high_priority_dives])

len(cameras), cameras

100%|██████████| 7/7 [00:00<00:00, 10.93it/s]


(7,
 [Camera(id=5, serial_number='BJ6C67988', name='FSL-05'),
  Camera(id=1, serial_number='BJ6C69506', name='FSL-01'),
  Camera(id=2, serial_number='BJ6C83748', name='FSL-02'),
  Camera(id=6, serial_number='BJ6C67987', name='FSL-06'),
  Camera(id=4, serial_number='BJ6C85524', name='FSL-04'),
  Camera(id=6, serial_number='BJ6C67987', name='FSL-06'),
  Camera(id=3, serial_number='BJ6C85528', name='FSL-03')])

In [22]:
cameras_by_id = {camera.id: camera for camera in cameras}

len(cameras_by_id), cameras_by_id

(6,
 {5: Camera(id=5, serial_number='BJ6C67988', name='FSL-05'),
  1: Camera(id=1, serial_number='BJ6C69506', name='FSL-01'),
  2: Camera(id=2, serial_number='BJ6C83748', name='FSL-02'),
  6: Camera(id=6, serial_number='BJ6C67987', name='FSL-06'),
  4: Camera(id=4, serial_number='BJ6C85524', name='FSL-04'),
  3: Camera(id=3, serial_number='BJ6C85528', name='FSL-03')})

In [23]:
async with Client(URL) as client:
    camera_intrinsics_list = await tqdm_asyncio.gather(*[client.cameras.get_intrinsics(camera.id) for camera in cameras])

len(camera_intrinsics_list), camera_intrinsics_list

100%|██████████| 7/7 [00:00<00:00, 12.13it/s]


(7,
 [<fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bc69ae0>,
  <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bd5bc50>,
  <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bc61810>,
  <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e50fbf650>,
  <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e50e25550>,
  <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bd5b390>,
  <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bc616e0>])

In [24]:
camera_intrinsics_by_camera_id = {intrinsics.camera_id: intrinsics for intrinsics in camera_intrinsics_list}

len(camera_intrinsics_by_camera_id), camera_intrinsics_by_camera_id

(6,
 {5: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bc69ae0>,
  1: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bd5bc50>,
  2: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bc61810>,
  6: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bd5b390>,
  4: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e50e25550>,
  3: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x722e4bc616e0>})

In [25]:
def process_raw(image_path: Path):
    with image_path.open("rb") as f:
        with rawpy.imread(f) as raw:
            img = img_as_float(
                raw.postprocess(
                    gamma=(1, 1),
                    no_auto_bright=True,
                    use_camera_wb=True,
                    output_bps=16,
                    user_flip=0,
                )
            )

            hsv = cv2.cvtColor(img_as_ubyte(img), cv2.COLOR_BGR2HSV)
            _, _, val = cv2.split(hsv)

            mid = 20
            mean = np.mean(val)
            meanLog = math.log(mean)
            midLog = math.log(mid * 255)
            gamma = midLog / meanLog
            gamma = 1 / gamma

            img = adjust_gamma(img, gamma=gamma)

            img = equalize_adapthist(img)

            return img_as_ubyte(img[:, :, ::-1])

In [26]:
def rectify(img: np.ndarray, camera_matrix: np.ndarray, distortion_coefficients: np.ndarray) -> np.ndarray:
    return cv2.undistort(
        img,
        camera_matrix,
        distortion_coefficients,
    )

In [None]:
for label in tqdm(incomplete_species_labels):
    image = image_by_id[label.image_id]
    dive = dives_by_id[image.dive_id]
    camera = cameras_by_id[dive.camera_id]
    camera_intrinsics = camera_intrinsics_by_camera_id[camera.id]

    image_path = DATA_FOLDER / image.path
    target_path = OUTPUT_FOLDER / f"{image.checksum}.JPG"

    source_nas_path = f"/fishsense_data/REEF/data/{image.path}"
    filestation.get_file(source_nas_path, "download", dest_path=str(image_path.parent))

    img = process_raw(image_path)
    img = rectify(img, camera_intrinsics.camera_matrix, camera_intrinsics.distortion_coefficients)

    cv2.imwrite(target_path.as_posix(), img)

    target_nas_path = f"/fishsense_process_work/preprocess_headtail_jpeg"
    filestation.upload_file(target_nas_path, str(target_path), overwrite=True)

  0%|          | 0/156 [00:00<?, ?it/s]

Upload Progress: 100%|██████████| 6.91M/6.91M [00:03<00:00, 1.91MB/s]
Upload Progress: 100%|██████████| 6.98M/6.98M [00:03<00:00, 2.11MB/s]
Upload Progress: 100%|██████████| 7.51M/7.51M [00:04<00:00, 1.86MB/s]
Upload Progress: 100%|██████████| 7.13M/7.13M [00:03<00:00, 2.35MB/s]
Upload Progress: 100%|██████████| 7.36M/7.36M [00:05<00:00, 1.34MB/s]
Upload Progress: 100%|██████████| 7.59M/7.59M [00:04<00:00, 1.99MB/s]
Upload Progress: 100%|██████████| 7.44M/7.44M [00:03<00:00, 2.39MB/s]
Upload Progress: 100%|██████████| 6.98M/6.98M [00:03<00:00, 2.14MB/s]
Upload Progress: 100%|██████████| 7.32M/7.32M [00:03<00:00, 2.08MB/s]
Upload Progress: 100%|██████████| 7.21M/7.21M [00:03<00:00, 1.95MB/s]
Upload Progress: 100%|██████████| 7.16M/7.16M [00:03<00:00, 1.96MB/s]
Upload Progress: 100%|██████████| 7.13M/7.13M [00:03<00:00, 1.87MB/s]
Upload Progress: 100%|██████████| 7.10M/7.10M [00:03<00:00, 1.91MB/s]
Upload Progress: 100%|██████████| 6.90M/6.90M [00:04<00:00, 1.79MB/s]
Upload Progress: 100