In [1]:
import urllib.parse
from pathlib import Path

import psycopg
from psycopg.rows import dict_row
from tqdm.auto import tqdm

from fishsense_data_processing_spider.backend import get_project_export
from fishsense_data_processing_spider.config import PG_CONN_STR, settings

In [2]:
export = get_project_export(
    project_id=10,
    label_studio_api_key=settings.label_studio.api_key,
    label_studio_host=settings.label_studio.host
)

In [3]:
image_paths = {task['id']: Path(urllib.parse.parse_qs(urllib.parse.urlparse(task['data']['img']).query)['d'][0]).relative_to(
    'fs_png_labeling_project_laser/REEF/data').with_suffix('.ORF').as_posix() for task in export}

In [4]:
coords = {task['id']: (int(task['annotations'][0]['result'][0]['value']['x'] / 100 *
                           task['annotations'][0]['result'][0]['original_width']),
                       int(task['annotations'][0]['result'][0]['value']['y'] / 100 *
                           task['annotations'][0]['result'][0]['original_height']))
          for task in export
          if len(task['annotations']) > 0 and len(task['annotations'][0]['result']) > 0}

In [5]:
bad_task_id = []
import_values = []
with psycopg.connect(PG_CONN_STR, row_factory=dict_row) as con, con.cursor() as cur:
    cur.execute('SELECT image_md5, laser_task_id FROM images WHERE laser_task_id IS NOT NULL;')
    result = cur.fetchall()
    lookup = {row['laser_task_id']:row['image_md5'] for row in result}
    for task_id, coord in tqdm(coords.items()):
        if task_id not in lookup:
            bad_task_id.append(task_id)
            continue
        cksum = lookup[task_id]
        import_values.append({
            'cksum': cksum,
            'x': coord[0],
            'y': coord[1]
        })
    cur.executemany(
        'INSERT INTO laser_labels (cksum, x, y) VALUES (%(cksum)s, %(x)s, %(y)s) ON CONFLICT DO NOTHING;',
        import_values
    )
    con.commit()

  0%|          | 0/56912 [00:00<?, ?it/s]

In [6]:
export = get_project_export(
    project_id=39,
    label_studio_api_key=settings.label_studio.api_key,
    label_studio_host=settings.label_studio.host
)

In [19]:
param_seq = [{
                'cksum': Path(urllib.parse.urlparse(task['data']['img']).path).stem,
                'x': int(task['annotations'][0]['result'][0]['value']['x'] / 100 *
                        task['annotations'][0]['result'][0]['original_width']),
                'y': int(task['annotations'][0]['result'][0]['value']['y'] / 100 *
                        task['annotations'][0]['result'][0]['original_height'])
            }
            for task in export
            if len(task['annotations']) > 0 and len(task['annotations'][0]['result']) > 0
            ]

In [20]:
with psycopg.connect(PG_CONN_STR, row_factory=dict_row) as con, con.cursor() as cur:
    cur.executemany(
        'INSERT INTO laser_labels (cksum, x, y) VALUES (%(cksum)s, %(x)s, %(y)s) ON CONFLICT DO NOTHING;',
        param_seq
    )
    con.commit()