In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../../q1_o2kr2_dataset_annotations/')

import json
import os
import cv2
from matplotlib import pyplot as plt
import matplotlib.patches as patches
import numpy as np
import pandas as pd
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
import uuid
from construct_fish_detection_dataset_o2kr2 import establish_plali_connection, insert_into_plali
from rectification import rectify

from weight_estimation.weight_estimator import WeightEstimator, CameraMetadata

pd.set_option('display.max_colwidth', 500)

<h1> Load the dataset and arrange into pairs </h1>

In [None]:
os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials.json'

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['PLALI_SQL_CREDENTIALS'])))
query = """
    select * from plali.plali_annotations x
    inner join 
    ( select a.id as plali_image_id, a.images, a.metadata, b.id as workflow_id, b.name from plali.plali_images a
    inner join plali.plali_workflows b
    on a.workflow_id = b.id ) y
    on x.plali_image_id = y.plali_image_id
    where workflow_id = '00000000-0000-0000-0000-000000000047';
"""

annotated_df = rds.extract_from_database(query)
annotated_df['image'] = annotated_df['images'].apply(lambda x: x[0])
annotated_df['annotation_count'] = annotated_df.annotation.apply(lambda x: len(x['annotations']) if x.get('annotations') else 0)



In [None]:
left_frame_urls, right_frame_urls, left_anns, right_anns = [], [], [], []

count = 0
mask = annotated_df.annotation_count > 0
for idx, row in annotated_df.iterrows():
    
    if 'right_frame' in row.image:
        continue
        
    # get left and right image URLs and annotations
        
    left_frame_s3_url = row.image
    left_ann = row.annotation
    
    right_frame_s3_url = left_frame_s3_url.replace('left', 'right')
    right_frame_mask = annotated_df.image == right_frame_s3_url
    
    if right_frame_mask.sum() == 1:
        right_ann = annotated_df[right_frame_mask].annotation.iloc[0]

        left_frame_urls.append(left_frame_s3_url)
        right_frame_urls.append(right_frame_s3_url)
        left_anns.append(left_ann)
        right_anns.append(right_ann)

    if count % 1000 == 0:
        print(count)
    count += 1
    

paired_df = pd.DataFrame({
    'left_frame_url': left_frame_urls,
    'right_frame_url': right_frame_urls,
    'left_ann': left_anns,
    'right_ann': right_anns
})

In [None]:
paired_df

<h1> Check some of the bounding boxes </h1>

In [None]:
def download_image(image_url):
    image_s3_url = image_url
    url_components = image_s3_url.replace('s3://', '').split('/')
    bucket = url_components[0]
    key = os.path.join(*url_components[1:])
    image_f = s3.download_from_s3(bucket, key)
    return image_f


def plot_stereo_image(left_image_f, right_image_f, left_ann, right_ann):
    fig, axes = plt.subplots(1, 2)
    
    # show left image
    left_im = cv2.imread(left_image_f)
    left_im = cv2.cvtColor(left_im, cv2.COLOR_BGR2RGB)
    axes[0].imshow(left_im)
    
    # plot left rectangle
    x, y, w, h = left_ann['xCrop'], left_ann['yCrop'], left_ann['width'], left_ann['height']
    rect = patches.Rectangle((x, y), w, h, linewidth=1,edgecolor='r',facecolor='none')
    axes[0].add_patch(rect)
    
    # show right image
    right_im = cv2.imread(right_image_f)
    right_im = cv2.cvtColor(right_im, cv2.COLOR_BGR2RGB)
    axes[1].imshow(right_im)
    
    # plot right rectangle
    x, y, w, h = right_ann['xCrop'], right_ann['yCrop'], right_ann['width'], right_ann['height']
    rect = patches.Rectangle((x, y), w, h, linewidth=1,edgecolor='r',facecolor='none')
    axes[1].add_patch(rect)
    
    plt.show()
    

# filter only on images in which full fish was found
for idx, row in paired_df.head(10).iterrows():
    
    # download left image
    left_image_f = download_image(row.left_frame_url)
    right_image_f = download_image(row.right_frame_url)
    left_ann = row.left_ann['annotations'][0]
    right_ann = row.right_ann['annotations'][0]
    
    # plot image
    plot_stereo_image(left_image_f, right_image_f, left_ann, right_ann)
    
    
    

    
    

In [None]:
paired_df.head()

<h1> Rectify frames </h1>

In [None]:
def download_from_s3_url(s3_url):
    url_components = s3_url.replace('s3://', '').split('/')
    bucket = url_components[0]
    key = os.path.join(*url_components[1:])
    f = s3.download_from_s3(bucket, key)
    return f, bucket, key

In [None]:
left_image_rectified_s3_urls, right_image_rectified_s3_urls = [], []

stereo_parameters_url = 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40013181_R40012414/2021-02-10T10_35_05.569443000Z_L40013181_R40012414_stereo-parameters.json'
count = 0
for idx, row in paired_df.iterrows():
    
    # get unrectified full resolution frames
    left_frame_s3_url, right_frame_s3_url = row.left_frame_url, row.right_frame_url
    left_full_res_frame_s3_url = left_frame_s3_url.replace('.resize_512_512.jpg', '.jpg')
    right_full_res_frame_s3_url = right_frame_s3_url.replace('.resize_512_512.jpg', '.jpg')
    left_full_res_frame_f, _, left_full_res_frame_key = download_from_s3_url(left_full_res_frame_s3_url)
    right_full_res_frame_f, _, right_full_res_frame_key = download_from_s3_url(right_full_res_frame_s3_url)
    stereo_parameters_f, _, _ = s3.download_from_url(stereo_parameters_url)
    
    # rectify into full resolution stereo frame pair and save to disk
    left_image_rectified, right_image_rectified = rectify(left_full_res_frame_f, right_full_res_frame_f, stereo_parameters_f)
    left_image_rectified_f = os.path.join(os.path.dirname(left_full_res_frame_f), 'left_frame.rectified.jpg')
    right_image_rectified_f = os.path.join(os.path.dirname(right_full_res_frame_f), 'right_frame.rectified.jpg')
    cv2.imwrite(left_image_rectified_f, left_image_rectified)
    cv2.imwrite(right_image_rectified_f, right_image_rectified)
    
    # upload rectified stereo frame pairs to s3
    left_rectified_full_res_frame_key = left_full_res_frame_key.replace('.jpg', '.rectified.jpg')
    right_rectified_full_res_frame_key = right_full_res_frame_key.replace('.jpg', '.rectified.jpg')
    s3.s3_client.upload_file(left_image_rectified_f, 'aquabyte-images-raw', left_rectified_full_res_frame_key)
    s3.s3_client.upload_file(right_image_rectified_f, 'aquabyte-images-raw', right_rectified_full_res_frame_key)
    
    # append to url lists
    left_image_rectified_s3_url = os.path.join('s3://', 'aquabyte-images-raw', left_rectified_full_res_frame_key)
    right_image_rectified_s3_url = os.path.join('s3://', 'aquabyte-images-raw', right_rectified_full_res_frame_key)
    left_image_rectified_s3_urls.append(left_image_rectified_s3_url)
    right_image_rectified_s3_urls.append(right_image_rectified_s3_url)
    
    print(count)
    count += 1




In [None]:
cv2.imwrite('/root/data/alok/biomass_estimation/playground/im1.jpg', left_image_rectified)
cv2.imwrite('/root/data/alok/biomass_estimation/playground/im2.jpg', right_image_rectified)


<h1> Upload the paired data for key-point annotation </h1>

In [None]:
def process_into_plali_records(image_url_pairs, workflow_id):

    values_to_insert = []
    for idx, image_url_pair in enumerate(image_url_pairs):
        id = str(uuid.uuid4())
        images = set(image_url_pair)
        metadata = {}
        priority = float(idx) / len(image_url_pairs)

        values = {
            'id': id,
            'workflow_id': workflow_id,
            'images': images,
            'metadata': metadata,
            'priority': priority
        }

        values_to_insert.append(values)

    return values_to_insert


def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))



In [None]:
image_url_pairs = list(zip(left_image_rectified_s3_urls, right_image_rectified_s3_urls))

In [None]:
WORKFLOW_ID = '00000000-0000-0000-0000-000000000053'    
values_to_insert = process_into_plali_records(image_url_pairs, WORKFLOW_ID)



In [None]:
os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials.json'
engine, sql_metadata = establish_plali_connection()

n = 10
count = 0
for chunk in chunker(values_to_insert, n):
    insert_into_plali(chunk, engine, sql_metadata)
    
    count += 1
    print(count)

<h1> Analyze annotations </h1>

<h2> Load key-point annotations </h2>

In [None]:
def parse_ann_into_url(ann):
    url = ann['leftCrop']['imageUrl']
    url = url.replace('%3D', '=').replace('%3A', ':').split('?')[0]
    return url

def parse_url_into_ts(url):
    url_components = url.split('/')
    ts = [component for component in url_components if 'at=' in component][0]
    return ts
    

os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials'
rds = RDSAccessUtils(json.load(open(os.environ['PLALI_SQL_CREDENTIALS'])))
query = """
    select * from plali.plali_annotations x
    inner join 
    ( select a.id as plali_image_id, a.images, a.metadata, b.id as workflow_id, b.name from plali.plali_images a
    inner join plali.plali_workflows b
    on a.workflow_id = b.id ) y
    on x.plali_image_id = y.plali_image_id
    where workflow_id = '00000000-0000-0000-0000-000000000053';
"""

annotated_df = rds.extract_from_database(query)
annotated_df = annotated_df[annotated_df.annotation.apply(lambda x: True if 'leftCrop' in x else False)]
annotated_df['left_crop_url'] = annotated_df.annotation.apply(lambda ann: parse_ann_into_url(ann))
annotated_df['timestamp'] = annotated_df.left_crop_url.apply(lambda url: parse_url_into_ts(url))




<h2> Load PIT tag scans </h2>

In [None]:
pit_tag_df = pd.read_csv('/root/data/alok/biomass_estimation/playground/imr_pit_tag_data.csv')
pit_tag_df.Link = pit_tag_df.Link.apply(lambda x: x.replace('%3D', '=').replace('%3A', ':'))
mask = pit_tag_df.PITtag.apply(lambda x: x.startswith('04') if type(x) == str else False)
pit_tag_df = pit_tag_df[mask]
pit_tag_df['timestamp'] = pit_tag_df.Link.apply(lambda url: parse_url_into_ts(url))

In [None]:
merged_df = pd.merge(annotated_df, pit_tag_df, on='timestamp')

In [None]:
df = pd.read_csv('/root/data/alok/biomass_estimation/playground/langoy_cage_8.csv', header=None)

In [None]:
df.columns = ['a', 'b', 'c', 'd', 'e', 'weight', 'g']

In [None]:
df.g.head()

In [None]:
for val in measurement_df[~measurement_df.S_w.isnull()].S_w.values:
    if 'e' in val:
        continue
    print(int(val))

<h2> Load weight measurements </h2>

In [None]:
measurement_df = pd.read_csv('/root/data/alok/biomass_estimation/playground/fish_measurement_data.csv')
full_df = pd.merge(measurement_df, merged_df, on='PITtag')

<h1> Predict weights </h1>

In [None]:
class AnnotationFormatError(Exception):
    pass


anns = []
for idx, row in full_df.iterrows():
    try:
        raw_ann = row.annotation
        if 'skipReasons' in raw_ann:
            raise AnnotationFormatError
        
        ann = {'leftCrop': [], 'rightCrop': []}
        for side in ['leftCrop', 'rightCrop']:
            for raw_item in row.annotation[side]['annotation']['annotations']:
                if 'xCrop' not in raw_item or 'yCrop' not in raw_item:
                    raise AnnotationFormatError
                
                item = {
                    'xFrame': raw_item['xCrop'],
                    'yFrame': raw_item['yCrop'],
                    'keypointType': raw_item['category']
                }
                
                ann[side].append(item)
        

        if any([len(ann[side]) != 11 for side in ['leftCrop', 'rightCrop']]):
            raise AnnotationFormatError
        
        left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann['leftCrop']}
        flip = left_keypoints['DORSAL_FIN'][1] > left_keypoints['PELVIC_FIN'][1]
        adj_ann = {'leftCrop': [], 'rightCrop': []}
        for side in ['leftCrop', 'rightCrop']:
            for item in ann[side]:
                adj_item = {
                    'xFrame': item['xFrame'],
                    'yFrame': 3000 - item['yFrame'] if flip else item['yFrame'],
                    'keypointType': item['keypointType']
                }

                adj_ann[side].append(adj_item)
        
        
        anns.append(adj_ann)
        
    except AnnotationFormatError as err:
        anns.append(None)
    
    
full_df['ann'] = anns

In [None]:
for idx, row in full_df.iterrows():
    ann = row.ann
    if ann is not None:
        left_mean_x = np.mean([item['xFrame'] for item in ann['leftCrop']])
        right_mean_x = np.mean([item['xFrame'] for item in ann['rightCrop']])
        print(left_mean_x - right_mean_x)


In [None]:
stereo_parameters_url = 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40013181_R40012414/2021-02-10T10_35_05.569443000Z_L40013181_R40012414_stereo-parameters.json'
stereo_parameters_f, _, _ = s3.download_from_url(stereo_parameters_url)

stereo_params = json.load(open(stereo_parameters_f))
camera_metadata = {
    'focalLengthPixel': stereo_params['CameraParameters1']['FocalLength'][0],
    'baseline': abs(stereo_params['TranslationOfCamera2'][0] / 1e3),
    'focalLength': stereo_params['CameraParameters1']['FocalLength'][0] * 3.45e-6,
    'pixelCountWidth': 4096,
    'pixelCountHeight': 3000,
    'imageSensorWidth': 0.01412,
    'imageSensorHeight': 0.01035
}

In [None]:
weight_model_f, _, _ = s3.download_from_url('https://aquabyte-models.s3-us-west-1.amazonaws.com/biomass/trained_models/2020-11-27T00-00-00/weight_model_synthetic_data.pb')
kf_model_f, _, _ = s3.download_from_url('https://aquabyte-models.s3-us-west-1.amazonaws.com/k-factor/trained_models/2020-08-08T000000/kf_predictor_v2.pb')
weight_estimator = WeightEstimator(weight_model_f, kf_model_f)

pred_weights = []
pred_lengths = []
pred_kfs = []

count = 0
for idx, row in full_df.iterrows():
    ann = row.ann
    if ann is not None:
        left_keypoints = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann['leftCrop']}
    
    if ann is not None:
        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )

        weight, length, kf = weight_estimator.predict(ann, cm)
        pred_weights.append(weight)
        pred_lengths.append(length)
        pred_kfs.append(kf)
    else:
        pred_weights.append(None)
        pred_lengths.append(None)
        pred_kfs.append(None)
    
    if count % 1000 == 0:
        print(count)
    count += 1
    

In [None]:
full_df['pred_weight'] = pred_weights
full_df['pred_length'] = pred_lengths
full_df['pred_kf'] = pred_kfs

In [None]:
mask = ~full_df.S_w.isnull() & ~full_df.pred_weight.isnull() & (full_df.S_w.str.contains('e') == False)

In [None]:
(full_df[mask].pred_weight.values.mean() - full_df[mask].S_w.astype(float).values.mean()) / full_df[mask].S_w.astype(float).values.mean()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(full_df[mask].S_w.astype(float).values, full_df[mask].pred_weight.values)
plt.plot([0, 10000], [0, 10000], color='red')
plt.xlabel('Ground truth weight (g)')
plt.ylabel('Predicted weight (g)')
plt.grid()
plt.show()

<h1> Load missing for annotation </h1>

In [None]:
missed_mask = ~(pd.merge(measurement_df, pit_tag_df).timestamp.isin(full_df.timestamp))
left_links = pd.merge(measurement_df, pit_tag_df)[missed_mask].Link.tolist()

In [None]:
annotated_left_links = []
for a, b in annotated_df.images.tolist():
    if 'left' in a:
        annotated_left_links.append(a)
    elif 'left' in b:
        annotated_left_links.append(b)
    else:
        print('error')
    


image_pairs_to_upload = []
for l in left_links:
    left_s3_url = os.path.join('s3://', 'aquabyte-images-raw', l[l.index('env'):])
    right_s3_url = left_s3_url.replace('left', 'right')
    s3_url_pair = (left_s3_url, right_s3_url)
    image_pairs_to_upload.append(s3_url_pair)

In [None]:
left_image_rectified_s3_urls, right_image_rectified_s3_urls = [], []

stereo_parameters_url = 'https://aquabyte-stereo-parameters.s3-eu-west-1.amazonaws.com/L40013181_R40012414/2021-02-10T10_35_05.569443000Z_L40013181_R40012414_stereo-parameters.json'
count = 0
for left_frame_s3_url, right_frame_s3_url in image_pairs_to_upload[40:]:
    try:
    
        # get unrectified full resolution frames
        left_full_res_frame_s3_url = left_frame_s3_url.replace('.resize_512_512.jpg', '.jpg')
        right_full_res_frame_s3_url = right_frame_s3_url.replace('.resize_512_512.jpg', '.jpg')
        left_full_res_frame_f, _, left_full_res_frame_key = download_from_s3_url(left_full_res_frame_s3_url)
        right_full_res_frame_f, _, right_full_res_frame_key = download_from_s3_url(right_full_res_frame_s3_url)
        stereo_parameters_f, _, _ = s3.download_from_url(stereo_parameters_url)

        # rectify into full resolution stereo frame pair and save to disk
        left_image_rectified, right_image_rectified = rectify(left_full_res_frame_f, right_full_res_frame_f, stereo_parameters_f)
        left_image_rectified_f = os.path.join(os.path.dirname(left_full_res_frame_f), 'left_frame.rectified.jpg')
        right_image_rectified_f = os.path.join(os.path.dirname(right_full_res_frame_f), 'right_frame.rectified.jpg')
        cv2.imwrite(left_image_rectified_f, left_image_rectified)
        cv2.imwrite(right_image_rectified_f, right_image_rectified)

        # upload rectified stereo frame pairs to s3
        left_rectified_full_res_frame_key = left_full_res_frame_key.replace('.jpg', '.rectified.jpg')
        right_rectified_full_res_frame_key = right_full_res_frame_key.replace('.jpg', '.rectified.jpg')
        s3.s3_client.upload_file(left_image_rectified_f, 'aquabyte-images-raw', left_rectified_full_res_frame_key)
        s3.s3_client.upload_file(right_image_rectified_f, 'aquabyte-images-raw', right_rectified_full_res_frame_key)

        # append to url lists
        left_image_rectified_s3_url = os.path.join('s3://', 'aquabyte-images-raw', left_rectified_full_res_frame_key)
        right_image_rectified_s3_url = os.path.join('s3://', 'aquabyte-images-raw', right_rectified_full_res_frame_key)
        left_image_rectified_s3_urls.append(left_image_rectified_s3_url)
        right_image_rectified_s3_urls.append(right_image_rectified_s3_url)
    except Exception as e:
        print(e)
    
    print(count)
    count += 1




In [None]:
image_url_pairs = list(zip(left_image_rectified_s3_urls, right_image_rectified_s3_urls))
WORKFLOW_ID = '00000000-0000-0000-0000-000000000053'    
values_to_insert = process_into_plali_records(image_url_pairs, WORKFLOW_ID)

In [None]:
os.environ['PLALI_SQL_CREDENTIALS'] = '/run/secrets/plali_sql_credentials.json'
engine, sql_metadata = establish_plali_connection()

n = 10
count = 0
for chunk in chunker(values_to_insert, n):
    insert_into_plali(chunk, engine, sql_metadata)
    
    count += 1
    print(count)

In [None]:
full_df[mask].to_csv('/root/data/alok/biomass_estimation/playground/imr_data_with_predictions.csv')

In [None]:
full_df[mask].to_csv('/root/data/alok/biomass_estimation/playground/imr_data_with_predictions_v2.csv')