In [None]:
from collections import defaultdict
import json
import os
import random
import uuid

import cv2
from matplotlib import pyplot as plt
import numpy as np
from sqlalchemy import MetaData
from sqlalchemy import Table

from research.utils.data_access_utils import RDSAccessUtils
from research_lib.utils.data_access_utils import S3AccessUtils

from PIL import Image


In [None]:
s3 = S3AccessUtils('/root/data')
os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials'
rds = RDSAccessUtils(json.load(open(os.environ['PLALI_SQL_CREDENTIALS'])))
query = """
    select * from plali.plali_annotations x
    inner join 
    ( select a.id as plali_image_id, a.images, a.metadata, b.id as workflow_id, b.name from plali.plali_images a
    inner join plali.plali_workflows b
    on a.workflow_id = b.id ) y
    on x.plali_image_id = y.plali_image_id
    where workflow_id = '00000000-0000-0000-0000-000000000112';
"""

df = rds.extract_from_database(query)

In [None]:
df['data_spec_name'] = df.metadata.apply(lambda x: x.get('data_spec_name'))

In [None]:
df.groupby('data_spec_name').apply(lambda x: len(x))

In [None]:
df.shape

In [None]:
def get_full_count(ann):
    if 'skipReasons' in ann:
        return 0
    detections = ann['annotations']
    full_count = len([d for d in detections if d['label'] == 'FULL'])
    return full_count

def get_full_and_partial_count(ann):
    if 'skipReasons' in ann:
        return 0
    detections = ann['annotations']
    return len(detections)
    


In [None]:
df['full_count'] = df.annotation.apply(lambda x: get_full_count(x))
df['full_and_partial_count'] = df.annotation.apply(lambda x: get_full_and_partial_count(x))

In [None]:
data_specs = sorted(df.data_spec_name.unique().tolist())
for data_spec_name in data_specs:
    mask = df.data_spec_name == data_spec_name
    print(df[mask].full_count.sum(), df[mask].full_and_partial_count.sum())
    

In [None]:
df.to_csv('/root/data/alok/biomass_estimation/playground/high_recall_fish_annotation_v1_2258.csv')

In [None]:
output_dir = '/root/data/alok/biomass_estimation/playground/high_recall_fish_annotation_images'
count = 0
image_fs = []
for idx, row in df.iterrows():
    image_s3_url = row.images[0]
    image_s3_url_components = image_s3_url.replace('s3://', '').split('/')
    bucket, key = image_s3_url_components[0], os.path.join(*image_s3_url_components[1:])
    image_f = os.path.join(output_dir, 'image_{}.jpg'.format(count))
    s3.download_from_s3(bucket, key, image_f)
    image_fs.append(image_f)
    
    if count % 10 == 0:
        print(count)
        
    count += 1

In [None]:
df['path_on_quad'] = image_fs

In [None]:
# values_to_insert = []
# for idx, row in df.iterrows():
#     id_str = str(uuid.uuid4())
#     images = {row.images[0].replace('left', 'right')}
#     metadata = row.metadata
#     priority = 1.0
#     values = {
#         'id': id_str,
#         'workflow_id': '00000000-0000-0000-0000-000000000112',
#         'images': images,
#         'metadata': metadata,
#         'priority': priority
#     }
#     values_to_insert.append(values)

In [None]:
# n = 10
# count = 0
# for chunk in chunker(values_to_insert, n):
#     insert_into_plali(chunk, engine, sql_metadata)
    
#     count += 1
#     print(count)

<h1> Upload sample images for stereo crop annotations </h1>

In [None]:
os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials'

def establish_plali_connection():
    rds = RDSAccessUtils(json.load(open(os.environ['PLALI_SQL_CREDENTIALS'])))
    engine = rds.sql_engine
    sql_metadata = MetaData()
    sql_metadata.reflect(bind=engine)
    return engine, sql_metadata

def insert_into_plali(values_to_insert, engine, sql_metadata):
    table = sql_metadata.tables['plali_images']
    conn = engine.connect()
    trans = conn.begin()
    conn.execute(table.insert(), values_to_insert)
    trans.commit()
    
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [None]:
df['ann_count'] = df.annotation.apply(lambda x: len(x.get('annotations')) if x.get('annotations') else 0)
mask = df.ann_count < 6

image_urls = []
left_image_s3_urls = [x[0] for x in df[mask].sort_values('ann_count', ascending=False).head(3).images.tolist()]
for left_image_s3_url in left_image_s3_urls:
    right_image_s3_url = left_image_s3_url.replace('left', 'right')
    image_urls.append(left_image_s3_url)
    image_urls.append(right_image_s3_url)
    
    

In [None]:
engine, sql_metadata = establish_plali_connection()
values_to_insert = []
for image_url in image_urls:
    uuid_str = str(uuid.uuid4())
    images = {image_url}
    metadata = {'name': 'small_test'}
    priority = 1.0

    values = {
        'id': uuid_str,
        'workflow_id': '00000000-0000-0000-0000-000000000048',
        'images': images,
        'metadata': metadata,
        'priority': priority
    }

    values_to_insert.append(values)

In [None]:
n = 10
count = 0
for chunk in chunker(values_to_insert, n):
    insert_into_plali(chunk, engine, sql_metadata)
    
    count += 1
    print(count)

<h1> Extract pairs and run through Hungarian Matcher </h1>

In [None]:
import logging

from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist


def hungarian_matcher(left_ids, left_bottom_top_edge_locations, right_ids, right_bottom_top_edge_locations):
    """
    TBD
    Returns
        a list of left and right id pair. If either id is None, it is an unmatched item.
    """
    # match the bboxes. Return a list of matched bboxes
    COST_THRESHOLD = 100.0

    pairs = []
    if left_ids and right_ids:
        # pairwise euclidean distance matrix
        cost_matrix = cdist(left_bottom_top_edge_locations, right_bottom_top_edge_locations, metric='euclidean')

        # hungarian algorithm to minimize weights in bipartite graph
        row_ind, col_ind = linear_sum_assignment(cost_matrix)

        # move matched items from left_ids/right_ids to pairs
        for (r, c) in zip(row_ind, col_ind):
            if cost_matrix[r, c] < COST_THRESHOLD:
                pairs.append((left_ids[r], right_ids[c]))
                left_ids[r] = None
                right_ids[c] = None

    # unmatched singles
    lefts = [(key, None) for key in left_ids if key]
    rights = [(None, key) for key in right_ids if key]

    logging.info("hungarian_matcher left={}, right={} -> matched={}, left={}, right={}".format(
        len(left_ids), len(right_ids), len(pairs), len(lefts), len(rights)))

    # merge all into pairs as final result
    pairs.extend(lefts)
    pairs.extend(rights)
    return pairs

In [None]:
query = """
    select * from plali.plali_annotations x
    inner join 
    ( select a.id as plali_image_id, a.images, a.metadata, b.id as workflow_id, b.name from plali.plali_images a
    inner join plali.plali_workflows b
    on a.workflow_id = b.id ) y
    on x.plali_image_id = y.plali_image_id
    where workflow_id = '00000000-0000-0000-0000-000000000112';
"""

df = rds.extract_from_database(query)
df['ann_count'] = df.annotation.apply(lambda x: len(x['annotations']) if 'annotations' in x else 0)


In [None]:
# get crop pairs

crop_pair_dict = defaultdict(dict)
mask = df.ann_count > 0
for idx, row in df[mask].iterrows():
    image_url = row.images[0]
    url_components = image_url.split('/')
    ts = [x for x in url_components if x.startswith('at=')][0]
    side = url_components[-1].split('_')[0]
    crop_pair_dict[ts]['{}_url'.format(side)] = image_url
    crop_pair_dict[ts]['{}_ann'.format(side)] = row.annotation
    
new_dict = {}
for k, v in crop_pair_dict.items():
    if len(v) == 4:
        new_dict[k] = v

crop_pair_dict = new_dict

In [None]:
# run Hungarian Matcher
crop_pair_objs = []
for key, crop_pair_obj in crop_pair_dict.items():
    left_ann = crop_pair_obj['left_ann']
    right_ann = crop_pair_obj['right_ann']
    
    left_ids, left_bottom_top_edge_locations = [], []
    for idx, crop in enumerate(left_ann['annotations']):
        left_ids.append(idx)
        left_bottom_top_edge_location = [crop['yCrop'], crop['yCrop'] + crop['height']]
        left_bottom_top_edge_locations.append(left_bottom_top_edge_location)
 
    right_ids, right_bottom_top_edge_locations = [], []
    for idx, crop in enumerate(right_ann['annotations']):
        right_ids.append(idx)
        right_bottom_top_edge_location = [crop['yCrop'], crop['yCrop'] + crop['height']]
        right_bottom_top_edge_locations.append(right_bottom_top_edge_location)
        
    id_pairs = hungarian_matcher(left_ids, left_bottom_top_edge_locations, right_ids, right_bottom_top_edge_locations)
    crop_pair_obj_new = dict(crop_pair_obj)
    crop_pair_obj_new['id_pairs'] = id_pairs
    crop_pair_objs.append(crop_pair_obj_new)
        


<h1> Upload upscaled unrectified crops to s3 </h1>

In [None]:
def download_from_s3_url(s3_url):
    s3_url_components = s3_url.replace('s3://', '').split('/')
    bucket, key = s3_url_components[0], os.path.join(*s3_url_components[1:])
    f = s3.download_from_s3(bucket, key)
    return f

def get_bbox(ann):
    c1 = ann['xCrop']
    c2 = ann['yCrop']
    c3 = ann['xCrop'] + ann['width']
    c4 = ann['yCrop'] + ann['height']
    bbox = [c1, c2, c3, c4]
    return bbox


def resize_bbox(bbox, original_dims, new_dims):
    resized_bbox = [
        int(bbox[0] * float(new_dims[0] / original_dims[0])),
        int(bbox[1] * float(new_dims[1] / original_dims[1])),
        int(bbox[2] * float(new_dims[0] / original_dims[0])),
        int(bbox[3] * float(new_dims[1] / original_dims[1]))
    ]
    
    return resized_bbox


def generate_crop_metadata(bbox):
    crop_metadata = {
        'x_coord': bbox[0],
        'y_coord': bbox[1],
        'width': bbox[2] - bbox[0],
        'height': bbox[3] - bbox[1]
    }
    
    return crop_metadata


def produce_crop(image, bbox):
    crop = image[bbox[1]:bbox[3], bbox[0]:bbox[2]]
    return crop


In [None]:
FULL_RES_WIDTH = 4096
FULL_RES_HEIGHT = 3000
THUMBNAIL_WIDTH = 512
THUMBNAIL_HEIGHT = 512

left_crop_urls, right_crop_urls, metadatas = [], [], []

destination_bucket = 'aquabyte-images-adhoc'

for idx, crop_pair_obj in enumerate(crop_pair_objs):
    print(idx)
    left_s3_url = crop_pair_obj['left_url']
    right_s3_url = crop_pair_obj['right_url']
    left_ann = crop_pair_obj['left_ann']
    right_ann = crop_pair_obj['right_ann']
    left_image_f = download_from_s3_url(left_s3_url)
    right_image_f = download_from_s3_url(right_s3_url)
    left_image = cv2.imread(left_image_f, cv2.COLOR_BGR2GRAY)
    right_image = cv2.imread(right_image_f, cv2.COLOR_BGR2GRAY)
    left_image = cv2.resize(left_image, (FULL_RES_WIDTH, FULL_RES_HEIGHT))
    right_image = cv2.resize(right_image, (FULL_RES_WIDTH, FULL_RES_HEIGHT))
    
    for left_id, right_id in crop_pair_obj['id_pairs']:
        if not left_id or not right_id:
            continue
        left_bbox = get_bbox(left_ann['annotations'][left_id])
        right_bbox = get_bbox(right_ann['annotations'][right_id])
        left_bbox = resize_bbox(left_bbox, (THUMBNAIL_WIDTH, THUMBNAIL_HEIGHT), (FULL_RES_WIDTH, FULL_RES_HEIGHT))
        right_bbox = resize_bbox(right_bbox, (THUMBNAIL_WIDTH, THUMBNAIL_HEIGHT), (FULL_RES_WIDTH, FULL_RES_HEIGHT))
        left_crop = produce_crop(left_image, left_bbox)
        right_crop = produce_crop(right_image, right_bbox)
        
        left_crop_f_name = 'left_frame_crop_{}_{}_{}_{}.jpg'.format(*left_bbox)
        right_crop_f_name = 'right_frame_crop_{}_{}_{}_{}.jpg'.format(*right_bbox)
        left_crop_f = os.path.join(os.path.dirname(left_image_f), left_crop_f_name)
        right_crop_f = os.path.join(os.path.dirname(right_image_f), right_crop_f_name)
        
        cv2.imwrite(left_crop_f, left_crop)
        cv2.imwrite(right_crop_f, right_crop)
        
        left_crop_key = left_crop_f[left_crop_f.index('environment='):]
        right_crop_key = right_crop_f[right_crop_f.index('environment='):]
        s3.s3_client.upload_file(left_crop_f, destination_bucket, left_crop_key)
        s3.s3_client.upload_file(right_crop_f, destination_bucket, right_crop_key)
        
        left_crop_s3_url = os.path.join('s3://', destination_bucket, left_crop_key)
        right_crop_s3_url = os.path.join('s3://', destination_bucket, right_crop_key)
        
        left_crop_metadata = generate_crop_metadata(left_bbox)
        right_crop_metadata = generate_crop_metadata(right_bbox)
        metadata = {
            'name': 'keypoint_small_test_v1',
            'left_crop_metadata': left_crop_metadata,
            'right_crop_metadata': right_crop_metadata
        }
        
        left_crop_urls.append(left_crop_s3_url)
        right_crop_urls.append(right_crop_s3_url)
        metadatas.append(metadata)
        
         

<h1> Upload to PLALI for key-point annotation </h1>

In [None]:
values_to_insert = []

for left_crop_url, right_crop_url, metadata in zip(left_crop_urls, right_crop_urls, metadatas):
    
    uuid_str = str(uuid.uuid4())
    images = {left_crop_url, right_crop_url}
    priority = random.random()

    values = {
        'id': uuid_str,
        'workflow_id': '00000000-0000-0000-0000-000000000117',
        'images': images,
        'metadata': metadata,
        'priority': priority
    }

    values_to_insert.append(values)

In [None]:
n = 10
count = 0
for chunk in chunker(values_to_insert, n):
    insert_into_plali(chunk, engine, sql_metadata)
    
    count += 1
    print(count)

<h1> Load key-point annotations and parse into standard form </h1>

In [None]:
class AnnotationFormatError(Exception):
    pass


def add_anns(annotated_df):
    anns = []
    for idx, row in annotated_df.iterrows():
        metadata = row.metadata
        try:
            raw_ann = row.annotation
            if 'skipReasons' in raw_ann:
                raise AnnotationFormatError

            ann = {'leftCrop': [], 'rightCrop': []}

            for side in ['leftCrop', 'rightCrop']:
                for raw_item in row.annotation[side]['annotation']['annotations']:
                    if 'xCrop' not in raw_item or 'yCrop' not in raw_item:
                        raise AnnotationFormatError
                    item = {
                        'xCrop': raw_item['xCrop'],
                        'yCrop': raw_item['yCrop'],
                        'xFrame': raw_item['xCrop'] + metadata['{}_crop_metadata'.format(side.replace('Crop', ''))]['x_coord'],
                        'yFrame': raw_item['yCrop'] + metadata['{}_crop_metadata'.format(side.replace('Crop', ''))]['y_coord'],
                        'keypointType': raw_item['category']
                    }

                    ann[side].append(item)

            anns.append(ann)

        except AnnotationFormatError as err:
            anns.append(None)

    annotated_df['ann'] = anns
    


In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['PLALI_SQL_CREDENTIALS'])))
query = """
    select * from plali.plali_annotations x
    inner join 
    ( select a.id as plali_image_id, a.images, a.metadata, b.id as workflow_id, b.name from plali.plali_images a
    inner join plali.plali_workflows b
    on a.workflow_id = b.id ) y
    on x.plali_image_id = y.plali_image_id
    where workflow_id = '00000000-0000-0000-0000-000000000117';
"""

df = rds.extract_from_database(query)

In [None]:
add_anns(df)
df['pen_id'] = df.images.apply(lambda x: x[0].split('/')[5].replace('pen-id=', '')).astype(int)
df['date'] = df.images.apply(lambda x: x[0].split('/')[6].replace('date=', ''))

In [None]:
sorted(df.images.apply(lambda x: os.path.join(*x[0].split('/')[4:7])).unique())

<h1> Rectify key-points </h1>

In [None]:
pen_id_to_stereo = {
    56: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40052278_R40052264/2020-05-25T07:21:30.968604000Z_L40052278_R40052264_stereo-parameters.json',
    4: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40052270_R40052370/2020-06-03T10:45:56.119147000Z_L40052270_R40052370_stereo-parameters.json',
    100: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40034708_R40034561/2020-01-31T00:00:00Z_L40034708_R40034561_stereo-parameters.json',
    173: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40032706_R40032703/2020-11-10T09:50:27.538021000Z_L40032706_R40032703_stereo-parameters.json',
    86: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40034368_R40034367/2019-12-03T00:00:00Z_L40034368_R40034367_stereo-parameters.json',
    95: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40013179_R40048967/2020-04-10T00:00:00Z_L40013179_R40048967_stereo-parameters.json',
    144: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40054807_R40054861/2020-07-28T11:20:43.978607000Z_L40054807_R40054861_stereo-parameters.json',
    194: 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40049577_R40049578/2020-12-10T11:40:13.694113000Z_L40049577_R40049578_stereo-parameters.json'
}

In [None]:
import json
from typing import Dict
import cv2
import numpy as np


IMAGE_WIDTH = 4096
IMAGE_HEIGHT = 3000


def get_camera_parameters(params: Dict) -> Dict:
    """Return individual camera parameters from JSON stereo parameters contents."""

    camera_matrix_1 = np.array(params['CameraParameters1']['IntrinsicMatrix']).transpose()
    camera_matrix_2 = np.array(params['CameraParameters2']['IntrinsicMatrix']).transpose()

    dist_coeffs_1 = params['CameraParameters1']['RadialDistortion'][0:2] + \
                    params['CameraParameters1']['TangentialDistortion'] + \
                    [params['CameraParameters1']['RadialDistortion'][2]]
    dist_coeffs_1 = np.array(dist_coeffs_1)

    dist_coeffs_2 = params['CameraParameters2']['RadialDistortion'][0:2] + \
                    params['CameraParameters2']['TangentialDistortion'] + \
                    [params['CameraParameters2']['RadialDistortion'][2]]
    dist_coeffs_2 = np.array(dist_coeffs_2)

    R = np.array(params['RotationOfCamera2']).transpose()
    T = np.array(params['TranslationOfCamera2']).transpose()

    image_size = (IMAGE_WIDTH, IMAGE_HEIGHT)
    (R1, R2, P1, P2, Q, leftROI, rightROI) = cv2.stereoRectify(camera_matrix_1, dist_coeffs_1,
                                                               camera_matrix_2,
                                                               dist_coeffs_2, image_size, R, T,
                                                               None,
                                                               None,
                                                               None, None, None,
                                                               cv2.CALIB_ZERO_DISPARITY, 0)
    left_maps = cv2.initUndistortRectifyMap(camera_matrix_1, dist_coeffs_1, R1, P1, image_size,
                                            cv2.CV_16SC2)
    right_maps = cv2.initUndistortRectifyMap(camera_matrix_2, dist_coeffs_2, R2, P2, image_size,
                                             cv2.CV_16SC2)

    params = {
        'left_maps': left_maps,
        'right_maps': right_maps,
        'camera_matrix_1': camera_matrix_1,
        'dist_coeffs_1': dist_coeffs_1,
        'R1': R1,
        'P1': P1,
        'camera_matrix_2': camera_matrix_2,
        'dist_coeffs_2': dist_coeffs_2,
        'R2': R2,
        'P2': P2
    }
    return params


def rectify(ann: Dict, params: Dict) -> Dict:
    """Rectify ann with params."""

    camera_matrix_1 = params['camera_matrix_1']
    dist_coeffs_1 = params['dist_coeffs_1']
    R1 = params['R1']
    P1 = params['P1']

    camera_matrix_2 = params['camera_matrix_2']
    dist_coeffs_2 = params['dist_coeffs_2']
    R2 = params['R2']
    P2 = params['P2']

    ann_r = {'leftCrop': [], 'rightCrop': []}
    for side in ['leftCrop', 'rightCrop']:
        for item in ann[side]:
            bp = item['keypointType']
            x = item['xFrame']
            y = item['yFrame']
            if side == 'leftCrop':
                x_new, y_new = \
                    cv2.undistortPoints(
                        np.array([[x, y]]).astype(float),
                        camera_matrix_1,
                        dist_coeffs_1,
                        R=R1,
                        P=P1)[0][0]
            elif side == 'rightCrop':
                x_new, y_new = \
                    cv2.undistortPoints(
                        np.array([[x, y]]).astype(float),
                        camera_matrix_2,
                        dist_coeffs_2,
                        R=R2,
                        P=P2)[0][0]
            else:
                raise Exception('Invalid side!')

            ann_r[side].append({
                'keypointType': bp,
                'xFrame': x_new,
                'yFrame': y_new,
            })

    return ann_r

def get_camera_metadata(stereo_parameters):
    
    camera_metadata = {
        'focalLengthPixel': stereo_parameters['CameraParameters1']['FocalLength'][0],
        'baseline': abs(stereo_parameters['TranslationOfCamera2'][0] / 1e3),
        'focalLength': stereo_parameters['CameraParameters1']['FocalLength'][0] * 3.45e-6,
        'pixelCountWidth': 4096,
        'pixelCountHeight': 3000,
        'imageSensorWidth': 0.01412,
        'imageSensorHeight': 0.01035
    }

    return camera_metadata


stereo_params_cache = {}
def rectify_df(df, pen_id_to_stereo):
    ann_rs, cms = [], []
    
    count = 0
    for idx, row in df.iterrows():
        ann = row.ann
        pen_id = row.pen_id
        if pen_id in stereo_params_cache:
            stereo_params = stereo_params_cache[pen_id]
        else:
            stereo_params_f, _, _ = s3.download_from_url(pen_id_to_stereo[pen_id])
            stereo_params = json.load(open(stereo_params_f))
            stereo_params_cache[pen_id] = stereo_params
        
        params = get_camera_parameters(stereo_params)
        camera_metadata = get_camera_metadata(stereo_params)
        cms.append(camera_metadata)
        
        if ann is None or ann.get('leftCrop') is None or ann.get('rightCrop') is None:
            ann_rs.append(None)
            continue
        else:

            # rectify with new params
            ann_r = rectify(ann, params)
            ann_rs.append(ann_r)
        
        
    
        if count % 10 == 0:
            print(count)
        count += 1

    df['ann_r'] = ann_rs
    df['cm'] = cms
    
    


In [None]:
rectify_df(df, pen_id_to_stereo)

<h1> Get depth values </h1>

In [None]:
from research.weight_estimation.keypoint_utils.optics import pixel2world

In [None]:
def ann_is_valid(ann):
    if ann is None:
        return False
    if not ann.get('leftCrop') or not ann.get('rightCrop'):
        return False
    if len(ann['leftCrop']) < 11 or len(ann['rightCrop']) < 11:
        return False
    
    return True

depths = []
valid_body_parts_list = []
throw_out_count = 0
for idx, row in df.iterrows():
    ann, ann_r = row.ann, row.ann_r
    cm = row.cm
    if not ann_is_valid(ann_r):
        depths.append(None)
        valid_body_parts_list.append([])
        continue
        

    world_keypoints = pixel2world(ann_r['leftCrop'], ann_r['rightCrop'], cm)
    
    invalid_body_parts = set()
    for item in ann['leftCrop']:
        if item['xCrop'] < 50 and item['yCrop'] < 100:
            invalid_body_parts.add(item['keypointType'])
    for item in ann['rightCrop']:
        if item['xCrop'] < 50 and item['yCrop'] < 100:
            invalid_body_parts.add(item['keypointType'])
    
    point_depths, valid_body_parts = [], []
    for body_part, coordinates in world_keypoints.items():
        if body_part not in invalid_body_parts:
            point_depths.append(coordinates[1])
            valid_body_parts.append(body_part)
            
    left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_r['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_r['rightCrop']}
    
    max_y_dev = 0
    for bp in valid_body_parts:
        y_deviation = abs(left_keypoints[bp][1] - right_keypoints[bp][1])
        if y_deviation > max_y_dev:
            max_y_dev = y_deviation
    
    if max_y_dev > 50:
        depths.append(None)
        valid_body_parts_list.append([])
        throw_out_count += 1
        continue
    
    
    depth = np.median(point_depths)
    depths.append(depth)
    valid_body_parts_list.append(valid_body_parts)
    
df['depth'] = depths
df['valid_body_parts'] = valid_body_parts_list
    

In [None]:
throw_out_count

In [None]:
df.groupby('pen_id').apply(lambda x: x.shape[0])

In [None]:
plt.figure(figsize=(10, 5))
mask = (df.depth > 0) & (df.depth < 2) & (df.valid_body_parts.apply(lambda x: len(x)) == 11)
mask_2 = (df.depth > 0) & (df.depth < 2)
plt.hist(df[mask_2].depth, bins=100, color='red', alpha = 0.5, label='full and partial')
plt.hist(df[mask].depth, bins=100, color='blue', label='full')
plt.legend()
plt.grid()
plt.show()

In [None]:
depth_bucket_cutoffs = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
normalized_sample_sizes = []
for low_depth, high_depth in zip(depth_bucket_cutoffs, depth_bucket_cutoffs[1:]):
    mask = (df.depth > low_depth) & (df.depth < high_depth)
    sample_size = df[mask].shape[0]
    field_factor = low_depth ** 2
    normalized_sample_size = sample_size / field_factor
    normalized_sample_sizes.append(normalized_sample_size)
    

In [None]:
plt.plot(depth_bucket_cutoffs[:-1], normalized_sample_sizes)
plt.xlabel('Distance from camera')
plt.ylabel('Sample size noramlized by field area')
plt.grid()
plt.show()

In [None]:
df[mask].shape[0] / (df[mask].valid_body_parts.apply(lambda x: len(x)) == 11).sum() 

In [None]:
# rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))
# query = """
#     select * from prod.biomass_computations
#     where pen_id=194
#     and captured_at between '2021-01-01' and '2021-01-06'
#     and akpd_score > 0.9;
# """
# tdf = rds.extract_from_database(query)

In [None]:
depths = []
for idx, row in tdf.iterrows():
    ann = row.annotation
    cm = row.camera_metadata
    world_keypoints = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    depths.append(np.median([x[1] for x in world_keypoints.values()]))
    
tdf['depth'] = depths

In [None]:
plt.hist(tdf.depth, bins=100)
plt.show()

In [None]:
count = 0
mask = (df.depth < 0.5)
for idx, row in df[mask].iterrows():
    ann_r = row.ann_r
    
    if not ann_is_valid(ann_r):
        depths.append(None)
        valid_body_parts_list.append([])
        continue
    
    left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_r['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_r['rightCrop']}
    
    valid_body_parts = row.valid_body_parts
    max_y_dev = 0
    for bp in valid_body_parts:
        y_deviation = abs(left_keypoints[bp][1] - right_keypoints[bp][1])
        if y_deviation > max_y_dev:
            max_y_dev = y_deviation
    
    if max_y_dev > 50:
        continue
        
    print(row.depth)
    left_crop_f = download_from_s3_url(row.images[0])
    right_crop_f = download_from_s3_url(row.images[1])
    
    plt.imshow(cv2.imread(left_crop_f))
    plt.show()
    plt.imshow(cv2.imread(right_crop_f))
    plt.show()

    count += 1
    if count > 100:
        break
    


In [None]:
idx = 4
df.images.iloc[idx]

In [None]:
left_image_f = download_from_s3_url(df.images.iloc[idx][0])
right_image_f = download_from_s3_url(df.images.iloc[idx][1])
if 'right' in left_image_f:
    x = left_image_f
    left_image_f = right_image_f
    right_image_f = x
    

left_image = cv2.cvtColor(cv2.imread(left_image_f), cv2.COLOR_BGR2RGB)
right_image = cv2.cvtColor(cv2.imread(right_image_f), cv2.COLOR_BGR2RGB)


In [None]:
def get_thumbnail_url(crop_url):
    side = 'left' if 'left' in crop_url else 'right'
    thumbnail_url = crop_url.replace('aquabyte-images-adhoc', 'aquabyte-frames-resized-inbound')
    thumbnail_url = os.path.join(os.path.dirname(thumbnail_url), '{}_frame.resize_512_512.jpg'.format(side))
    return thumbnail_url

In [None]:
left_thumbnail_url = get_thumbnail_url(df.images.iloc[0][0])
right_thumbnail_url = get_thumbnail_url(df.images.iloc[0][1])
left_thumbnail_f = download_from_s3_url(left_thumbnail_url)
right_thumbnail_f = download_from_s3_url(right_thumbnail_url)
left_thumbnail = cv2.resize(cv2.cvtColor(cv2.imread(left_thumbnail_f), cv2.COLOR_BGR2RGB), (4096, 3000))
right_thumbnail = cv2.resize(cv2.cvtColor(cv2.imread(right_thumbnail_f), cv2.COLOR_BGR2RGB), (4096, 3000))


In [None]:
fig, ax = plt.subplots()
ax.imshow(left_thumbnail)

for idx, row in df.head(5).iterrows():
    ann = row.ann
    valid_body_parts = row.valid_body_parts
    left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann['rightCrop']}
    for bp, kp in left_keypoints.items():
        if bp in valid_body_parts:
            ax.scatter([kp[0]], [kp[1]], color='red', s=1)
        
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.imshow(right_thumbnail)

for idx, row in df.head(5).iterrows():
    ann = row.ann
    valid_body_parts = row.valid_body_parts
    left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann['rightCrop']}
    for bp, kp in right_keypoints.items():
        if bp in valid_body_parts:
            ax.scatter([kp[0]], [kp[1]], color='red', s=1)
        
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.imshow(left_image)

for idx, row in df.head(5).iterrows():
    if idx != 4:
        continue
    ann = row.ann
    valid_body_parts = row.valid_body_parts
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['rightCrop']}
    for bp, kp in left_keypoints.items():
        if bp in valid_body_parts:
            ax.scatter([kp[0]], [kp[1]], color='red', s=1)
        
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.imshow(right_image)

for idx, row in df.head(5).iterrows():
    if idx != 4:
        continue
    ann = row.ann
    valid_body_parts = row.valid_body_parts
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['rightCrop']}
    for bp, kp in right_keypoints.items():
        if bp in valid_body_parts:
            ax.scatter([kp[0]], [kp[1]], color='red', s=1)
        
plt.show()

In [None]:
df.ann.iloc[4]

In [None]:
df.annotation.iloc[4]

<h1> Examine y-coordinate deviation </h1>

In [None]:
y_coordinate_deviations = []
for idx, row in df.iterrows():
    ann_r = row.ann_r
    left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_r['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_r['rightCrop']}
    valid_body_parts = row.valid_body_parts
    for body_part in valid_body_parts:
        y1 = left_keypoints[body_part][1]
        y2 = right_keypoints[body_part][1]
        y_coordinate_deviations.append(y1 - y2)
        print(body_part, y1-y2)
    
    