In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import cv2

import glob
import os
import boto3
from sqlalchemy import create_engine, MetaData, Table, select, and_, func
from sqlalchemy.orm import sessionmaker, relationship, join
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Table, Column, Integer, ForeignKey
from sqlalchemy.orm import relationship
from aquabyte.optics import convert_to_world_point, depth_from_disp, pixel2world, euclidean_distance
from aquabyte.data_access_utils import DataAccessUtils

import pickle
from PIL import Image, ImageDraw

pd.set_option('max_columns', 500)
pd.set_option('max_colwidth', 50)

<h1> Establish connection to database and perform query for base dataset </h1>

In [None]:
# AWS credentials
aws_credentials = json.load(open(os.environ["AWS_CREDENTIALS"]))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
                         aws_secret_access_key=aws_credentials["aws_secret_access_key"],
                         region_name="eu-west-1")

data_access_utils = DataAccessUtils()

# prod SQL credentaials
sql_credentials = json.load(open(os.environ["PROD_SQL_CREDENTIALS"]))

sql_query = '''
select * from keypoint_annotations
where pen_id = 7;
'''

original_df = data_access_utils.extract_from_database(sql_query)
# original_df = original_df.loc[:, ~original_df.columns.duplicated()]

<h1> Iterate over query results and generate 3D coordinates + biomass estimates for each stereo fish detection </h1>

In [None]:
def coord2biomass_linear(world_keypoints, model):
    """from coordinates to biomass"""

    mean = model['mean']
    std= model['std']
    PCA_components = model['PCA_components']
    reg_coef = model['reg_coef']
    reg_intercept = model['reg_intercept']
    body_parts = model['body_parts']

    # calculate pairwise distances for production coord
    # based on the exact ordering reflected in the body_parts
    # variable above

    pairwise_distances = []
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            dist = euclidean_distance(world_keypoints[body_parts[i]], world_keypoints[body_parts[j]])
            pairwise_distances.append(dist)
    
    interaction_values_quadratic = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            dist1 = pairwise_distances[i]
            dist2 = pairwise_distances[j]
            interaction_values_quadratic.append(dist1 * dist2)
            
    interaction_values_cubic = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            for k in range(j, len(pairwise_distances)):
                dist1 = pairwise_distances[i]
                dist2 = pairwise_distances[j]
                dist3 = pairwise_distances[k]
                interaction_values_cubic.append(dist1 * dist2 * dist3)
            
    

    X = np.array(pairwise_distances + interaction_values_quadratic + interaction_values_cubic)

    X_normalized = (X - model['mean']) / model['std']
    X_transformed = np.dot(X_normalized, model['PCA_components'].T)
    prediction = np.dot(X_transformed, reg_coef) + reg_intercept
    return prediction



In [None]:
def generate_rotation_matrix(u_base, v):
    u = v / np.linalg.norm(v)
    n = np.cross(u_base, u)
    n = n / np.linalg.norm(n)
    theta = -np.arccos(np.dot(u, u_base))

    R = np.array([[
        np.cos(theta) + n[0]**2*(1-np.cos(theta)), 
        n[0]*n[1]*(1-np.cos(theta)) - n[2]*np.sin(theta),
        n[0]*n[2]*(1-np.cos(theta)) + n[1]*np.sin(theta)
    ], [
        n[1]*n[0]*(1-np.cos(theta)) + n[2]*np.sin(theta),
        np.cos(theta) + n[1]**2*(1-np.cos(theta)),
        n[1]*n[2]*(1-np.cos(theta)) - n[0]*np.sin(theta),
    ], [
        n[2]*n[0]*(1-np.cos(theta)) - n[1]*np.sin(theta),
        n[2]*n[1]*(1-np.cos(theta)) + n[0]*np.sin(theta),
        np.cos(theta) + n[2]**2*(1-np.cos(theta))
    ]])
    
    return R

def normalize_world_keypoints(world_keypoint_coordinates):
    body_parts = sorted(world_keypoint_coordinates.keys())
    wkps = {bp: np.array(world_keypoint_coordinates[bp]) for bp in body_parts}
    
    # translate keypoints such that tail notch is at origin
    translated_wkps = {bp: wkps[bp] - wkps['TAIL_NOTCH'] for bp in body_parts}
    
    # perform first rotation
    u_base=np.array([1, 0, 0])
    v = translated_wkps['UPPER_LIP']
    R = generate_rotation_matrix(u_base, v)
    norm_wkps_intermediate = {bp: np.dot(R, translated_wkps[bp]) for bp in body_parts}
    
    # perform second rotation
    u_base = np.array([0, 0, 1])
    v = norm_wkps_intermediate['DORSAL_FIN'] - np.array([norm_wkps_intermediate['DORSAL_FIN'][0], 0, 0])
    R = generate_rotation_matrix(u_base, v)
    norm_wkps = {bp: np.dot(R, norm_wkps_intermediate[bp]) for bp in body_parts}
    
    return norm_wkps
    


In [None]:
# load model parameters for Blender and linear models
model = pickle.load(open('/root/data/alok/biomass_estimation/models/model_v2.pkl', 'rb'))
# blender = json.load(open('/root/data/alok/biomass_estimation/models/volumes.json'))

qa_mask = original_df.annotated_by_email.str.contains('aquabyte')
# establish new columns
mask = (~original_df.keypoints.isnull())
for col in ['left_keypoints', 'right_keypoints', 'world_keypoint_coordinates', 'camera_metadata']:
    original_df[col] = np.nan
    original_df[col] = original_df[col].astype(object)
for col in ['predicted_biomass_linear', 'predicted_biomass_blender', 
            'max_y_coordinate_deviation', 'max_y_world_coordinate_deviation']:
    original_df[col] = np.nan
    

# modify the dataframe row-by-row
for idx, row in original_df[mask].iterrows():
    keypoints = row.keypoints
    left_image_url = row.left_image_url
    try:
        keypoints = original_df[(~qa_mask) & (original_df.left_image_url == left_image_url)].iloc[0].keypoints
    except:
        continue
    original_df.at[idx, 'keypoints'] = keypoints

    try:
        left_keypoints = keypoints['leftCrop']
        right_keypoints = keypoints['rightCrop']
    except:
        continue
            
    # compute world coordinates
    camera_metadata = row.camera_metadata
    camera_metadata['pixelCountHeight'] = 3000
    camera_metadata['pixelCountWidth'] = 4096
    world_keypoint_coordinates = pixel2world(left_keypoints, right_keypoints, camera_metadata)
    original_df.at[idx, 'camera_metadata'] = camera_metadata
    
    # update dataframe with world keypoint coordinates
    original_df.at[idx, 'left_keypoints'] = left_keypoints
    original_df.at[idx, 'right_keypoints'] = right_keypoints
    original_df.at[idx, 'world_keypoint_coordinates'] = world_keypoint_coordinates
    
    body_parts = sorted(list(world_keypoint_coordinates.keys()))
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            bp1, bp2 = body_parts[i], body_parts[j]
            col = '{}<->{}'.format(body_parts[i], body_parts[j])
            if not col in original_df.columns:
                original_df[col] = np.nan
            original_df.at[idx, col] = \
                euclidean_distance(world_keypoint_coordinates[bp1], world_keypoint_coordinates[bp2])
    
    # update dataframe with biomass predictions from both models
    predicted_biomass_linear = coord2biomass_linear(world_keypoint_coordinates, model)
    original_df.at[idx, 'predicted_biomass_linear'] = predicted_biomass_linear
    
    # update dataframe with keypoint deviation
    threshold = 10
    left_keypoint_y_coords = {bp['keypointType']: bp['yFrame'] for bp in left_keypoints}
    right_keypoint_y_coords = {bp['keypointType']: bp['yFrame'] for bp in right_keypoints}
    max_y_coordinate_deviation = \
        max([abs(left_keypoint_y_coords[bp] - right_keypoint_y_coords[bp]) for bp in body_parts])
    
    original_df.at[idx, 'max_y_coordinate_deviation'] = max_y_coordinate_deviation
    
    # add 3D range for world coordinate y-values
    
    
    norm_wkps = normalize_world_keypoints(world_keypoint_coordinates)
    norm_wkp_y_values = [norm_wkps[bp][1] for bp in norm_wkps.keys()]
    max_y_world_coordinate_deviation = max(norm_wkp_y_values) - min(norm_wkp_y_values)
    original_df.at[idx, 'max_y_world_coordinate_deviation'] = max_y_world_coordinate_deviation



<h1> Apply filters </h1>

In [None]:
invalid_fish_detection_ids = json.load(open('/root/data/alok/biomass_estimation/invalid_fish_detection_ids.json'))
df = original_df.copy(deep=True)

# define filters
valid_linear_prediction_mask = ~df.predicted_biomass_linear.isnull()
rectification_valid_mask = (~df.fish_detection_id.isin(invalid_fish_detection_ids))
keypoints_valid_mask = (df.max_y_coordinate_deviation < 15)
qa_mask = df.is_qa == True

inlier_mask = (df.predicted_biomass_linear > np.percentile(original_df.predicted_biomass_linear.dropna(), 1.0)) & \
              (df.predicted_biomass_linear < np.percentile(original_df.predicted_biomass_linear.dropna(), 99.0))

mask_valid = valid_linear_prediction_mask & rectification_valid_mask & keypoints_valid_mask & qa_mask

mask = mask_valid & inlier_mask

df = df[mask].copy(deep=True)
df.index = pd.to_datetime(df.captured_at)



In [None]:
df.head()

<h1> Display left and right crops with annotations overlayed </h1>

In [None]:
def plot_coordinates(image_url, side, keypoints):
    bucket = 'aquabyte-crops'
    key = image_url[image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    image_f = data_access_utils.download_from_s3(bucket, key)
    plt.figure(figsize=(30, 10))
    im = plt.imread(image_f)
    
    for keypoint in keypoints:
        keypoint_type = keypoint['keypointType']
        x, y = keypoint['xCrop'], keypoint['yCrop']
        plt.scatter([x], [y])
        plt.annotate(keypoint_type, (x, y), color='red')
        
    plt.imshow(im)        
    

In [None]:
keypoint_annotation_id = 173849
keypoint_annotation_mask = (df.id == keypoint_annotation_id)
left_image_url = df[keypoint_annotation_mask].left_image_url.iloc[0]
left_keypoints = df[keypoint_annotation_mask].left_keypoints.iloc[0]
right_image_url = df[keypoint_annotation_mask].right_image_url.iloc[0]
right_keypoints = df[keypoint_annotation_mask].right_keypoints.iloc[0]

world_keypoint_coordinates = df[keypoint_annotation_mask].world_keypoint_coordinates.iloc[0]
im_left = plot_coordinates(left_image_url, 'left', left_keypoints)
im_right = plot_coordinates(right_image_url, 'right', right_keypoints)

<h1> Generate lateral keypoint </h1>

In [None]:
def generate_lateral_keypoints(left_image, right_image, left_keypoints, right_keypoints, world_keypoints, 
                               bp_1='UPPER_LIP', bp_2='TAIL_NOTCH', left_window_size=100, 
                               min_breadth=0.04, max_breadth=0.2):
    left_extrap_kp = (0.5 * left_keypoints[bp_1] + 0.5 * left_keypoints[bp_2]).astype('int64')
    bp_1_depth = world_keypoints[bp_1][1]
    bp_2_depth = world_keypoints[bp_2][1]

    # need to determine lower and upper bounds here in a data driven fashion from GTSF data
    # hardcoded values used here
    extrap_kp_max_depth = (bp_1_depth + bp_2_depth) / 2.0 - min_breadth / 2.0
    extrap_kp_min_depth = (bp_1_depth + bp_2_depth) / 2.0 - max_breadth / 2.0

    # Compute the feature descriptor for the extrapolated keypoint in the left image
    extrap_kp_min_disp = disp_from_depth(extrap_kp_max_depth)
    extrap_kp_max_disp = disp_from_depth(extrap_kp_min_depth)
    
    left_box = left_image[left_extrap_kp[1]-left_window_size//2:left_extrap_kp[1]+left_window_size//2, 
                          left_extrap_kp[0]-left_window_size//2:left_extrap_kp[0]+left_window_size//2]
    right_box = right_image[left_extrap_kp[1]-left_window_size//2:left_extrap_kp[1]+left_window_size//2,
                            left_extrap_kp[0]-int(extrap_kp_max_disp)-left_window_size//2:left_extrap_kp[0]-int(extrap_kp_min_disp)+left_window_size//2]

    
    orb = cv2.ORB_create()
    kp1, des1 = orb.detectAndCompute(left_box,None)
    kp2, des2 = orb.detectAndCompute(right_box,None)
    
    # get top five matches
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    matches = bf.match(des1,des2)
    matches = sorted(matches, key = lambda x:x.distance)[:5]
    
    # get world coordinates of lateral keypoints
    lateral_wkps = []
    for match in matches[:5]:
        
        lateral_left_coordinates = np.array(kp1[match.queryIdx].pt).astype(int)
        lateral_left_coordinates[0] += left_extrap_kp[0]-left_window_size//2
        lateral_left_coordinates[1] += left_extrap_kp[1]-left_window_size//2
        
        lateral_right_coordinates = np.array(kp2[match.trainIdx].pt).astype(int)
        lateral_right_coordinates[0] += left_extrap_kp[0]-int(extrap_kp_max_disp)-left_window_size//2
        lateral_right_coordinates[1] += left_extrap_kp[1]-left_window_size//2
        
        disp = abs(lateral_left_coordinates[0] - lateral_right_coordinates[0])
        depth = depth_from_disp(disp)
        lateral_wkp = convert_to_world_point(lateral_left_coordinates[0], lateral_left_coordinates[1], depth)
        lateral_wkps.append(lateral_wkp)
        
    return np.array(lateral_wkps)

In [None]:
bucket = 'aquabyte-crops'

left_image_url = df[keypoint_annotation_mask].left_image_url.iloc[0]
left_keypoints = df[keypoint_annotation_mask].left_keypoints.iloc[0]
right_image_url = df[keypoint_annotation_mask].right_image_url.iloc[0]
right_keypoints = df[keypoint_annotation_mask].right_keypoints.iloc[0]
world_keypoints = df[keypoint_annotation_mask].world_keypoint_coordinates.iloc[0]

left_key = left_image_url[left_image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
left_image_f = data_access_utils.download_from_s3(bucket, left_key)
left_image = plt.imread(left_image_f)

right_key = right_image_url[right_image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
right_image_f = data_access_utils.download_from_s3(bucket, right_key)
right_image = plt.imread(right_image_f)

left_kps = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in left_keypoints}
right_kps = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in right_keypoints}

In [None]:
generate_lateral_keypoints(left_image, right_image, left_kps, right_kps, world_keypoints)