In [1]:
from pathlib import Path
import sqlite3
import pandas as pd
from dynaconf import Dynaconf
from label_studio_sdk.client import LabelStudio

In [2]:
settings = Dynaconf(
    envvar_prefix="DYNACONF",
    settings_files=['../settings.toml', '../.secrets.toml'],
    merge_enabled=True
)

In [3]:
label_studio_path = Path("../label_studio")
db_path = Path("../reef/processing.db")

In [4]:
con = sqlite3.connect(db_path)

df = pd.read_sql_query("""
                       select i.path, i.image_md5, i.laser_task_id
                       from canonical_dives d
                       join images i on d.path = i.dive
                       where i.image_md5 is not null
""", con)

con.close()

df

Unnamed: 0,path,image_md5,laser_task_id
0,2025-02-10 REEF Data Dump SMILE 6/121123_Key L...,0e4b37c7bb78af557b70f5775c75670d,
1,2025-02-10 REEF Data Dump SMILE 6/121123_Key L...,309b843db7ec32179e489f603743dee7,
2,2025-02-10 REEF Data Dump SMILE 6/121123_Key L...,7c934559e9558cfa546c065317f6136c,
3,2025-02-10 REEF Data Dump SMILE 6/121123_Key L...,418763383f760a29197f1e3bbcd88953,
4,2025-02-10 REEF Data Dump SMILE 6/121123_Key L...,c6445365386e207d47667307e3212fe9,
...,...,...,...
65797,2025-02-10 REEF Data Dump SMILE 6/120723_Key L...,7fc27c2ce692492c68199d12e26e8bda,
65798,2025-02-10 REEF Data Dump SMILE 6/120723_Key L...,59f843d3ed39f8ca344916774d9c39bc,
65799,2025-02-10 REEF Data Dump SMILE 6/120723_Key L...,4759dba723ef29fb6831110eb2a6e782,
65800,2025-02-10 REEF Data Dump SMILE 6/120723_Key L...,8e0d4097b21f27a48cb67918d5181a97,


In [5]:
files = [p.stem for p in label_studio_path.rglob("*.JPG")]

files

['622497d8b2f18d81ee066d00040d4119',
 'b84aa6197802dc167c687d3401fdac6f',
 '9e0b549a7c317211b80e4a2cfd97a08b',
 '410da347e16b818be98fe46c32127a48',
 '90108071ca3b2ef2afb8c0699a8cdcb2',
 'b24b811881ab9d22f1b000a0f7cf450e',
 '03b42500979235a94be9658db8271d98',
 '72905574e1eee88ff931922b8444a0c6',
 'd0b31faabe8a6cd33309ed5cdb260d1c',
 'd2e6a526f9cb34dada2739fbfa7334f0',
 'b74e987d460c3216c1f74beabd2c2a75',
 '82c459a3a7d95d28a3940f8443d01c6a',
 '9a9aa68bf16aefdcf6c3c18235c2e33f',
 'af1d0ac03896f9427b247bbc43be3ecd',
 'c8fd5d1efa046974a0aeebfa1b80a003',
 'a84347f11d83533772337fc3eb12c752',
 '0e0114dd7f79abbf1f27873e8ba58194',
 '332a04d249a0a915198229deed995c78',
 '0a6c5398a55e508548514ece44351fe1',
 '214d6d45002c8f794683cad3c0b4ad8e',
 'e24a0b00cbbf53467c1a46c69e8aaa89',
 '2b04f42069d12f618546e4fa41c9a855',
 '1c0c52043d7b0de429f2f848461cdf99',
 'cd9af1ea1cf44544987e19885cf223fd',
 '1fdcfd02ffdd0d57f1b79e5bcb76c3fd',
 '8cf0b7e53a27b046311ef29b5762b33f',
 '68fd498745610a596912f05c14c3fc77',
 

In [6]:
filtered_df = df[df["image_md5"].isin(files)]
filtered_df = df[df["laser_task_id"].notnull()]

filtered_df

Unnamed: 0,path,image_md5,laser_task_id
101,2023-09-07 REEF Data Dump/090123_SMILE Dives/0...,a67fc8a96f0e0fa1d84aee666ee6b5e2,39546.0
102,2023-09-07 REEF Data Dump/090123_SMILE Dives/0...,a8d20187687a761caadba8fd98227f9b,39560.0
103,2023-09-07 REEF Data Dump/090123_SMILE Dives/0...,757b623021c18b679140ef0ea0267040,39518.0
104,2023-09-07 REEF Data Dump/090123_SMILE Dives/0...,9e5a76a34697d28db9f049be6aad0bc6,39532.0
105,2023-09-07 REEF Data Dump/090123_SMILE Dives/0...,2fd1af7bf5081bc8b54a746db965a590,39567.0
...,...,...,...
63424,2023-09-07 REEF Data Dump/083123_Fish Degree A...,631827bae374e9b99f2b8e1eaa19d6c7,38026.0
63425,2023-09-07 REEF Data Dump/083123_Fish Degree A...,66fec5fc8c9651ef0657c07157db089f,38053.0
63426,2023-09-07 REEF Data Dump/083123_Fish Degree A...,ec4d4a6a133da23a0edd5db4acee5705,38032.0
63427,2023-09-07 REEF Data Dump/083123_Fish Degree A...,de643f65f02f77a03a0d01bb5b1e51bb,37999.0


In [7]:
label_studio = LabelStudio(base_url=f"https://{settings.label_studio.host}", api_key=settings.label_studio.api_key)

In [8]:
tasks = [(filtered_df["image_md5"].iloc[i], label_studio.tasks.get(int(filtered_df["laser_task_id"].iloc[i]))) for i in range(len(filtered_df))]

tasks

[('a67fc8a96f0e0fa1d84aee666ee6b5e2',
  DataManagerTaskSerializer(id=39546, predictions=[DataManagerTaskSerializerPredictionsItem(result=[{'id': 'vl4eYjOFjM', 'type': 'keypointlabels', 'value': {'x': 49.54936878187443, 'y': 44.19042882404951, 'width': 0.25, 'keypointlabels': ['Red Laser']}, 'to_name': 'img-1', 'from_name': 'kp-1', 'image_rotation': 0, 'original_width': 3987, 'original_height': 3016}], score=0.0, model_version='0.1.1.dev30+ee4b72d', model=None, model_run=None, task=39546, project=10.0, created_at=datetime.datetime(2024, 11, 10, 17, 41, 27, 749273, tzinfo=TzInfo(UTC)), updated_at=datetime.datetime(2024, 11, 10, 17, 41, 27, 749288, tzinfo=TzInfo(UTC)), id=6391, created_ago='4\xa0months', cluster=None, neighbors=None, mislabeling=0.0)], annotations=[AnnotationsDmField(id=3684, result=[{'id': 'rSydkk_1Hy', 'type': 'keypointlabels', 'value': {'x': 49.57892671500888, 'y': 44.13144664325452, 'width': 0.20145365914588592, 'keypointlabels': ['Red Laser']}, 'origin': 'manual', 't

In [9]:
completed_tasks = [(m, t) for m, t in tasks if t.total_annotations != 0]

completed_tasks

[('a67fc8a96f0e0fa1d84aee666ee6b5e2',
  DataManagerTaskSerializer(id=39546, predictions=[DataManagerTaskSerializerPredictionsItem(result=[{'id': 'vl4eYjOFjM', 'type': 'keypointlabels', 'value': {'x': 49.54936878187443, 'y': 44.19042882404951, 'width': 0.25, 'keypointlabels': ['Red Laser']}, 'to_name': 'img-1', 'from_name': 'kp-1', 'image_rotation': 0, 'original_width': 3987, 'original_height': 3016}], score=0.0, model_version='0.1.1.dev30+ee4b72d', model=None, model_run=None, task=39546, project=10.0, created_at=datetime.datetime(2024, 11, 10, 17, 41, 27, 749273, tzinfo=TzInfo(UTC)), updated_at=datetime.datetime(2024, 11, 10, 17, 41, 27, 749288, tzinfo=TzInfo(UTC)), id=6391, created_ago='4\xa0months', cluster=None, neighbors=None, mislabeling=0.0)], annotations=[AnnotationsDmField(id=3684, result=[{'id': 'rSydkk_1Hy', 'type': 'keypointlabels', 'value': {'x': 49.57892671500888, 'y': 44.13144664325452, 'width': 0.20145365914588592, 'keypointlabels': ['Red Laser']}, 'origin': 'manual', 't

In [10]:
completed_images = [p for p in label_studio_path.rglob("*.JPG") if p.stem in [m for m, _ in completed_tasks]]

completed_images

[PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/622497d8b2f18d81ee066d00040d4119.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/b84aa6197802dc167c687d3401fdac6f.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/9e0b549a7c317211b80e4a2cfd97a08b.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/410da347e16b818be98fe46c32127a48.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/b24b811881ab9d22f1b000a0f7cf450e.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/03b42500979235a94be9658db8271d98.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/72905574e1eee88ff931922b8444a0c6.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/d0b31faabe8a6cd33309ed5cdb260d1c.JPG'),
 PosixPath('../label_studio/2023-09-07 REEF Data Dump/08

In [11]:
for completed_image in completed_images:
    completed_image.unlink()
    print(f"Deleted {completed_image}")

Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/622497d8b2f18d81ee066d00040d4119.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/b84aa6197802dc167c687d3401fdac6f.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/9e0b549a7c317211b80e4a2cfd97a08b.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/410da347e16b818be98fe46c32127a48.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/b24b811881ab9d22f1b000a0f7cf450e.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/03b42500979235a94be9658db8271d98.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/72905574e1eee88ff931922b8444a0c6.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/d0b31faabe8a6cd33309ed5cdb260d1c.JPG
Deleted ../label_studio/2023-09-07 REEF Data Dump/082929_FishModels_FSL01/d2e6a526f9cb34dada2739fbfa7334f0.JPG
D