In [7]:
from fishsense_api_sdk.client import Client
from tqdm.asyncio import tqdm_asyncio
from typing import List
from fishsense_api_sdk.models.species_label import SpeciesLabel
from synology_api.filestation import FileStation
from fishsense_data_processing_workflow_worker.config import settings
from pathlib import Path
from tqdm.notebook import tqdm
from skimage.exposure import adjust_gamma, equalize_adapthist
from skimage.util import img_as_float, img_as_ubyte
import rawpy
import cv2
import numpy as np
import pymupdf
import math
from fishsense_data_processing_workflow_worker.config import settings

In [8]:
NAS_HOST = "e4e-nas.ucsd.edu"
NAS_PORT = 6021

In [9]:
DATA_FOLDER = (Path("../data") / "REEF" / "data").absolute()
OUTPUT_FOLDER = (Path("../output") / "preprocess_slate_images_jpeg").absolute()

DATA_FOLDER.mkdir(parents=True, exist_ok=True)
OUTPUT_FOLDER.mkdir(parents=True, exist_ok=True)

DATA_FOLDER.exists(), OUTPUT_FOLDER.exists()

(True, True)

In [10]:
filestation = FileStation(NAS_HOST, NAS_PORT, settings.e4e_nas.username, settings.e4e_nas.password, secure=True, cert_verify=False)

In [11]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as fs:
    dives = await fs.dives.get_canonical()

len(dives), dives[0]

(272,
 Dive(id=1, name='080123_FSL-01 Photos', path='2023-09-07 REEF Data Dump/080123_FSL-01 Photos', dive_datetime=datetime.datetime(2023, 8, 1, 12, 46, 27, tzinfo=TzInfo(0)), priority=<Priority.LOW: 'LOW'>, flip_dive_slate=True, camera_id=1, dive_slate_id=1))

In [12]:
high_priority_dives = [dive for dive in dives if dive.priority == 'HIGH' and dive.id == 383]

len(high_priority_dives), high_priority_dives[0]

(1,
 Dive(id=383, name='062624_FiveSeas_FSL03', path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03', dive_datetime=datetime.datetime(2024, 6, 26, 7, 52, 45, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=3, dive_slate_id=10))

In [13]:
high_priority_dives_by_id = {dive.id: dive for dive in high_priority_dives}

high_priority_dives_by_id

{383: Dive(id=383, name='062624_FiveSeas_FSL03', path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03', dive_datetime=datetime.datetime(2024, 6, 26, 7, 52, 45, tzinfo=TzInfo(0)), priority=<Priority.HIGH: 'HIGH'>, flip_dive_slate=None, camera_id=3, dive_slate_id=10)}

In [14]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as client:
    camera_intrinsics_list = await tqdm_asyncio.gather(*[client.cameras.get_intrinsics(dive.camera_id) for dive in high_priority_dives])

len(camera_intrinsics_list), camera_intrinsics_list[0]

100%|██████████| 1/1 [00:00<00:00,  6.22it/s]


(1,
 <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x70dc2a3830e0>)

In [15]:
camera_intrinsics_by_dive_id = {dive.id: intrinsics for dive, intrinsics in zip(high_priority_dives, camera_intrinsics_list)}

camera_intrinsics_by_dive_id

{383: <fishsense_api_sdk.models.camera_intrinsics.CameraIntrinsics at 0x70dc2a3830e0>}

In [16]:
species_labels: List[SpeciesLabel] = await tqdm_asyncio.gather(*[fs.labels.get_species_labels(dive.id) for dive in high_priority_dives])
species_labels = [label for sublist in species_labels for label in sublist]

len(species_labels), species_labels[0]

100%|██████████| 1/1 [00:00<00:00,  2.60it/s]


(40,
 SpeciesLabel(id=1179, label_studio_task_id=224623, label_studio_project_id=70, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/708d385dc06f8fd60c46683478e77b2b', updated_at=datetime.datetime(2025, 12, 9, 1, 2, 5, 406207, tzinfo=TzInfo(0)), completed=True, grouping=None, top_three_photos_of_group=None, slate_upside_down=None, laser_x=2043.1891467172798, laser_y=1355.6551899834365, laser_label='Red Laser', content_of_image='Slate, Laser on slate', fish_measurable_category=None, fish_angle_category=None, fish_curved_category=None, label_studio_json={'annotations': [{'id': 134871, 'result': [{'id': 'X-QEwfPU-K', 'type': 'taxonomy', 'value': {'taxonomy': [['Slate', 'Laser on slate']]}, 'origin': 'manual', 'to_name': 'image', 'from_name': 'species'}, {'id': 'w_p1HuzBVT', 'type': 'keypointlabels', 'value': {'x': 50.901573161865464, 'y': 44.94877950873463, 'width': 0.2762390456930156, 'keypointlabels': ['Red Laser']}, 'origin': 'manual', 'to_name': 'image',

In [17]:
slate_labels = [label for label in species_labels if label.content_of_image == 'Slate, Laser on slate']

len(slate_labels), slate_labels[0]

(9,
 SpeciesLabel(id=1179, label_studio_task_id=224623, label_studio_project_id=70, image_url='https://orchestrator.fishsense.e4e.ucsd.edu/api/v1/data/groups_jpeg/708d385dc06f8fd60c46683478e77b2b', updated_at=datetime.datetime(2025, 12, 9, 1, 2, 5, 406207, tzinfo=TzInfo(0)), completed=True, grouping=None, top_three_photos_of_group=None, slate_upside_down=None, laser_x=2043.1891467172798, laser_y=1355.6551899834365, laser_label='Red Laser', content_of_image='Slate, Laser on slate', fish_measurable_category=None, fish_angle_category=None, fish_curved_category=None, label_studio_json={'annotations': [{'id': 134871, 'result': [{'id': 'X-QEwfPU-K', 'type': 'taxonomy', 'value': {'taxonomy': [['Slate', 'Laser on slate']]}, 'origin': 'manual', 'to_name': 'image', 'from_name': 'species'}, {'id': 'w_p1HuzBVT', 'type': 'keypointlabels', 'value': {'x': 50.901573161865464, 'y': 44.94877950873463, 'width': 0.2762390456930156, 'keypointlabels': ['Red Laser']}, 'origin': 'manual', 'to_name': 'image', 

In [18]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as fs:
    slate_images = await tqdm_asyncio.gather(*[fs.images.get(image_id=label.image_id) for label in slate_labels])

len(slate_images), slate_images[0]

100%|██████████| 9/9 [00:00<00:00, 38.25it/s]


(9,
 Image(id=111909, path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03/P6260560.ORF', taken_datetime=datetime.datetime(2024, 6, 26, 6, 53, 31, tzinfo=TzInfo(0)), checksum='708d385dc06f8fd60c46683478e77b2b', is_canonical=True, dive_id=383, camera_id=3))

In [19]:
def process_raw(image_path: Path):
    with image_path.open("rb") as f:
        with rawpy.imread(f) as raw:
            img = img_as_float(
                raw.postprocess(
                    gamma=(1, 1),
                    no_auto_bright=True,
                    use_camera_wb=True,
                    output_bps=16,
                    user_flip=0,
                )
            )

            hsv = cv2.cvtColor(img_as_ubyte(img), cv2.COLOR_BGR2HSV)
            _, _, val = cv2.split(hsv)

            mid = 20
            mean = np.mean(val)
            meanLog = math.log(mean)
            midLog = math.log(mid * 255)
            gamma = midLog / meanLog
            gamma = 1 / gamma

            img = adjust_gamma(img, gamma=gamma)

            img = equalize_adapthist(img)

            return img_as_ubyte(img[:, :, ::-1])

In [20]:
def rectify(img: np.ndarray, camera_matrix: np.ndarray, distortion_coefficients: np.ndarray) -> np.ndarray:
    return cv2.undistort(
        img,
        camera_matrix,
        distortion_coefficients,
    )

In [21]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as fs:
    dive_slates = await client.dive_slates.get()

len(dive_slates), dive_slates[0]

(11,
 DiveSlate(id=5, name='Tic-Tac-Toe 4', dpi=300, path='Dive Slate#4.pdf', created_at=datetime.datetime(2025, 9, 6, 3, 53, 10, 190057, tzinfo=TzInfo(0)), reference_points=[(591.9493127134886, 419.73291956851295), (1180.375218367864, 415.508647477124), (2008.7060450606716, 408.7832988960453), (2598.7223535242415, 401.20588324117523), (594.2177470825221, 1320.1595244257721), (1183.4343179108548, 1318.369903930235), (2016.1826073962275, 1316.5857288918276), (2604.907191981395, 1314.7051789793095)]))

In [22]:
dive_slates_by_id = {dive_slate.id: dive_slate for dive_slate in dive_slates}

dive_slates_by_id

{5: DiveSlate(id=5, name='Tic-Tac-Toe 4', dpi=300, path='Dive Slate#4.pdf', created_at=datetime.datetime(2025, 9, 6, 3, 53, 10, 190057, tzinfo=TzInfo(0)), reference_points=[(591.9493127134886, 419.73291956851295), (1180.375218367864, 415.508647477124), (2008.7060450606716, 408.7832988960453), (2598.7223535242415, 401.20588324117523), (594.2177470825221, 1320.1595244257721), (1183.4343179108548, 1318.369903930235), (2016.1826073962275, 1316.5857288918276), (2604.907191981395, 1314.7051789793095)]),
 6: DiveSlate(id=6, name='Tic-Tac-Toe 5', dpi=300, path='Dive Slate #5.pdf', created_at=datetime.datetime(2025, 9, 6, 3, 53, 10, 190162, tzinfo=TzInfo(0)), reference_points=[(403.8205816493262, 631.5968154288058), (993.2497902769651, 632.7766211186832), (1740.9198685182203, 635.3794945908605), (2328.4124741771443, 635.521328189681), (425.52889197228865, 1488.5791173820157), (1014.7334760788126, 1491.8682245487987), (1758.603030468457, 1496.092149318488), (2346.7834576540045, 1498.101145176125

In [23]:
dive_slates_by_dive_id = {dive.id: dive_slates_by_id[dive.dive_slate_id] for dive in high_priority_dives}

dive_slates_by_dive_id

{383: DiveSlate(id=10, name='V-Slate 3', dpi=300, path='SMILE vslate 3.pdf', created_at=datetime.datetime(2025, 9, 6, 3, 53, 10, 190525, tzinfo=TzInfo(0)), reference_points=[(1179.0084868747833, 117.81684380775803), (2752.091097298008, 1538.4702475910544), (3668.6909445893743, 460.7455616290455), (4069.025033904235, 822.238374386121), (2784.5545688670823, 2374.0171777152314), (816.5862342973786, 561.4466136108025)])}

In [24]:
for image in tqdm(slate_images):
    dive = high_priority_dives_by_id[image.dive_id]
    dive_slate = dive_slates_by_dive_id[dive.id]
    camera_intrinsics = camera_intrinsics_by_dive_id[dive.id]

    image_path = DATA_FOLDER / image.path
    pdf_path = DATA_FOLDER / dive_slate.path
    target_path = OUTPUT_FOLDER / f"{image.checksum}.JPG"

    source_nas_path = f"/fishsense/Fishsense Lite Calibration Parameters/{dive_slate.path}"
    print(f"Downloading {source_nas_path} to {pdf_path}")
    filestation.get_file(source_nas_path, "download", dest_path=str(pdf_path.parent))

    with pymupdf.open(pdf_path) as pdf_document:
        page: pymupdf.Page = pdf_document.load_page(0)
        pixmap: pymupdf.Pixmap = page.get_pixmap(dpi=dive_slate.dpi)
        bytes = np.frombuffer(pixmap.samples, dtype=np.uint8)

        pdf_image = bytes.reshape(pixmap.height, pixmap.width, pixmap.n)
        pdf_image = cv2.cvtColor(pdf_image, cv2.COLOR_RGB2GRAY)

        _, pdf_image = cv2.threshold(pdf_image, 125, 255, cv2.THRESH_BINARY)

        pdf_image = cv2.cvtColor(pdf_image, cv2.COLOR_GRAY2BGR)

    source_nas_path = f"/fishsense_data/REEF/data/{image.path}"
    filestation.get_file(source_nas_path, "download", dest_path=str(image_path.parent))

    img = process_raw(image_path)
    img = rectify(img, camera_intrinsics.camera_matrix, camera_intrinsics.distortion_coefficients)

    img_height, img_width = img.shape[:2]
    pdf_height, pdf_width = pdf_image.shape[:2]

    scale_y = float(img_height) / float(pdf_height)

    pdf_height = int(pdf_height * scale_y)
    pdf_width = int(pdf_width * scale_y)
    pdf_image = cv2.resize(pdf_image, (pdf_width, pdf_height))

    new_img = np.zeros((img_height, img_width + pdf_width, 3), dtype=np.uint8)
    new_img[:, :pdf_width, :] = pdf_image
    new_img[:, pdf_width:, :] = img

    for idx, point in enumerate(dive_slate.reference_points):
        x, y = point
         
        x*= scale_y
        y*= scale_y

        x = int(x)
        y = int(y)

        cv2.circle(new_img, (x, y), radius=25, color=(0, 0, 255), thickness=-1)

        text = f"{idx + 1}"
        org = (x + 20, y - 10)
        fontFace = cv2.FONT_HERSHEY_SIMPLEX
        fontScale = 5
        color = (0, 0, 255) # Red color (BGR)
        thickness = 10
        lineType = cv2.LINE_AA

        # Put the text on the image
        cv2.putText(new_img, text, org, fontFace, fontScale, color, thickness, lineType)

    cv2.imwrite(target_path.as_posix(), new_img)

    target_nas_path = f"/fishsense_process_work/preprocess_slate_images_jpeg"
    filestation.upload_file(target_nas_path, str(target_path), overwrite=True)

  0%|          | 0/9 [00:00<?, ?it/s]

Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3884170', 'files': ('708d385dc06f8fd60c46683478e77b2b.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/708d385dc06f8fd60c46683478e77b2b.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.70M/3.70M [00:03<00:00, 1.05MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3829907', 'files': ('4711763b48368af835dc397c61411dab.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/4711763b48368af835dc397c61411dab.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.65M/3.65M [00:03<00:00, 1.13MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3897226', 'files': ('a16c10d11e3ea788f966800c77af1501.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/a16c10d11e3ea788f966800c77af1501.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.72M/3.72M [00:02<00:00, 1.41MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3868882', 'files': ('d8c968f9f1582fc6218064d57196f6f0.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/d8c968f9f1582fc6218064d57196f6f0.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.69M/3.69M [00:03<00:00, 1.26MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3969886', 'files': ('c7d43584ed2aeee8b2eaa0aaa58a29d8.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/c7d43584ed2aeee8b2eaa0aaa58a29d8.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.79M/3.79M [00:03<00:00, 1.28MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3695777', 'files': ('97bd3683b5a23ab2eda345ddbb73883b.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/97bd3683b5a23ab2eda345ddbb73883b.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.53M/3.53M [00:02<00:00, 1.33MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '4009608', 'files': ('67c6ae67d5af7120d246a0f2150c25c6.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/67c6ae67d5af7120d246a0f2150c25c6.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.82M/3.82M [00:03<00:00, 1.31MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3844518', 'files': ('96e46bd9a9c94fce052e23370e172823.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/96e46bd9a9c94fce052e23370e172823.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.67M/3.67M [00:02<00:00, 1.41MB/s]


Downloading /fishsense/Fishsense Lite Calibration Parameters/SMILE vslate 3.pdf to /home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/scripts/../data/REEF/data/SMILE vslate 3.pdf
<MultipartEncoder: {'path': '/fishsense_process_work/preprocess_slate_images_jpeg', 'create_parents': 'true', 'overwrite': 'true', 'size': '3942369', 'files': ('c5491e4af3e002fe2e2da783f8eceb6d.JPG', <_io.BufferedReader name='/home/chris/Repos/school/e4e/fishsense/fishsense-data-processing-workflow-worker/output/preprocess_slate_images_jpeg/c5491e4af3e002fe2e2da783f8eceb6d.JPG'>, 'image/jpeg')}>


Upload Progress: 100%|██████████| 3.76M/3.76M [00:02<00:00, 1.43MB/s]
