In [None]:
import os
from thumbnail_selector import PEN_SITE_MAPPING, get_capture_keys, get_image_urls_and_crop_metadatas, \
    get_random_image_urls_and_crop_metadatas
from construct_fish_detection_dataset_o2kr2 import establish_plali_connection, insert_into_plali

import uuid
from video_service.video_generation import generate_video

<h1> Set up stage 1 annotation </h1>

In stage 1, we will only identify which 512x512 left images contain a full fish. These will then cascade onto stage 2 for full key-point annotation 

<h1> Prepare image URL data for 2020-02-13 </h1>

In [None]:
generate_video(89, '2020-02-13', 0, 24, 1.0, True)

In [None]:
INBOUND_BUCKET = 'aquabyte-images-raw'

def get_image_urls(capture_keys):
    """Gets left urls, right urls, and crop metadatas corresponding to capture keys."""

    left_urls, crop_metadatas = [], []
    for capture_key in capture_keys:

        # get image URLs
        left_image_key = capture_key.replace('capture.json', 'left_frame.resize_512_512.jpg')
        left_image_url = os.path.join('s3://', INBOUND_BUCKET, left_image_key)
        left_urls.append(left_image_url)

    return left_urls


In [None]:
pen_id, start_date, end_date = 89, '2020-02-13', '2020-02-13'
capture_keys = get_capture_keys(pen_id, start_date, end_date, inbound_bucket='aquabyte-images-raw')

In [None]:
hours_to_include = [7, 8, 9, 10, 12, 13, 14, 15, 16]
capture_keys_to_include = []
for capture_key in capture_keys:
    key_components = capture_key.split('/')
    hour_component = [component for component in key_components if 'hour=' in component][0]
    hour = int(hour_component[-2:])
    if hour in hours_to_include:
        capture_keys_to_include.append(capture_key)
        
image_urls = get_image_urls(capture_keys_to_include)


<h2> Insert into PLALI </h1>

In [None]:
def process_into_plali_records(image_urls):

    values_to_insert = []
    for idx, image_url in enumerate(image_urls):
        id = str(uuid.uuid4())
        images = {image_url}
        metadata = {}
        priority = float(idx) / len(image_urls)

        values = {
            'id': id,
            'workflow_id': '00000000-0000-0000-0000-000000000047',
            'images': images,
            'metadata': metadata,
            'priority': priority
        }

        values_to_insert.append(values)

    return values_to_insert

In [None]:
values_to_insert = process_into_plali_records(image_urls)

In [None]:
os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials.json'
engine, sql_metadata = establish_plali_connection()

In [None]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [None]:
n = 10
count = 0
for chunk in chunker(values_to_insert[2:], n):
    insert_into_plali(chunk, engine, sql_metadata)
    
    count += 1
    print(count)

<h1> Prepare image URL data for 2020-02-14 </h1>

In [None]:
generate_video(89, '2020-02-14', 0, 24, 1.0, True)

In [None]:
pen_id, start_date, end_date = 89, '2020-02-14', '2020-02-14'
capture_keys = get_capture_keys(pen_id, start_date, end_date, inbound_bucket='aquabyte-images-raw')

In [None]:
hours_to_include = [6, 7, 8, 9, 10, 11, 12]
capture_keys_to_include = []
for capture_key in capture_keys:
    key_components = capture_key.split('/')
    hour_component = [component for component in key_components if 'hour=' in component][0]
    hour = int(hour_component[-2:])
    
    if hour == 6:
        ts_component = [component for component in key_components if 'at=' in component][0]
        minute = int(ts_component.split(':')[1])
        if minute < 35: 
            continue
    
    if hour == 10:
        ts_component = [component for component in key_components if 'at=' in component][0]
        minute = int(ts_component.split(':')[1])
        if minute > 30: 
            continue
    if hour == 11:
        ts_component = [component for component in key_components if 'at=' in component][0]
        minute = int(ts_component.split(':')[1])
        if minute < 28: 
            continue
    
    if hour in hours_to_include:
        capture_keys_to_include.append(capture_key)
        
image_urls = get_image_urls(capture_keys_to_include)


In [None]:
values_to_insert = process_into_plali_records(image_urls)

In [None]:
n = 10
count = 0
for chunk in chunker(values_to_insert[2:], n):
    insert_into_plali(chunk, engine, sql_metadata)
    
    count += 1
    print(count)