In [11]:
from fishsense_api_sdk.client import Client
from sklearn.cluster import HDBSCAN
from fishsense_data_processing_workflow_worker.config import settings
from fishsense_api_sdk.models.dive_frame_cluster import DiveFrameCluster
from fishsense_api_sdk.models.data_source import DataSource
from datetime import datetime, timezone
from tqdm.notebook import tqdm
import asyncio

In [2]:
DIVE_ID = 383

HDBSCAN_MIN_SAMPLES=2

In [3]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as client:
    dive = await client.dives.get(dive_id=DIVE_ID)

dive

Dive(id=383, name='062624_FiveSeas_FSL03', path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03', dive_datetime=datetime.datetime(2024, 6, 26, 7, 52, 45, tzinfo=TzInfo(0)), priority=<Priority.LOW: 'LOW'>, flip_dive_slate=None, camera_id=3, dive_slate_id=None)

In [4]:
async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as client:
    images = await client.images.get(dive_id=dive.id)

len(images), images[0]

(40,
 Image(id=111909, path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03/P6260560.ORF', taken_datetime=datetime.datetime(2024, 6, 26, 6, 53, 31, tzinfo=TzInfo(0)), checksum='708d385dc06f8fd60c46683478e77b2b', is_canonical=True, dive_id=383, camera_id=3))

In [5]:
timestamps = [
    img.taken_datetime.timestamp() if img.taken_datetime else 0
    for img in images
]

len(timestamps), timestamps[0]

(40, 1719384811.0)

In [6]:
X = [[ts] for ts in timestamps]

In [7]:
db = HDBSCAN(min_cluster_size=HDBSCAN_MIN_SAMPLES).fit(X)
labels = db.labels_

len(labels), len(set(labels)), labels[0]

(40, 13, np.int64(3))

In [8]:
clusters = {}
cluster_len = len(set(labels)) - (1 if -1 in labels else 0)
for label, img in zip(labels, images):
    if label == -1:
        label = cluster_len
        cluster_len += 1

    if label not in clusters:
        clusters[label] = []
    clusters[label].append(img)

clusters = list(clusters.values())

len(clusters), len(clusters[0]), clusters[0][0]

(13,
 3,
 Image(id=111909, path='2025-02-10 REEF Data Dump SMILE 6/CubaTrip2024_FSL03/062624_FiveSeas_FSL03/P6260560.ORF', taken_datetime=datetime.datetime(2024, 6, 26, 6, 53, 31, tzinfo=TzInfo(0)), checksum='708d385dc06f8fd60c46683478e77b2b', is_canonical=True, dive_id=383, camera_id=3))

In [None]:
now = datetime.now(timezone.utc)

async with Client(settings.fishsense_api.url, settings.fishsense_api.username, settings.fishsense_api.password) as client:
    async with asyncio.TaskGroup() as tg:
        for cluster in tqdm(clusters):
            image_ids = [img.id for img in cluster]

            dive_frame_cluster = DiveFrameCluster(
                id = None,
                image_ids=image_ids,
                data_source=DataSource.PREDICTION,
                updated_at=now,
                dive_id=DIVE_ID,
                fish_id=None,
            )

            tg.create_task(client.images.post_cluster(DIVE_ID, dive_frame_cluster))