In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import cv2

import glob
import os
import boto3
from sqlalchemy import create_engine, MetaData, Table, select, and_, func
from sqlalchemy.orm import sessionmaker, relationship, join
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Table, Column, Integer, ForeignKey
from sqlalchemy.orm import relationship
from aquabyte.optics import convert_to_world_point, depth_from_disp, pixel2world, euclidean_distance

import pickle
from PIL import Image, ImageDraw

<h1> Establish connection to database and perform query for base dataset </h1>

In [None]:
# AWS credentials
aws_credentials = json.load(open(os.environ["AWS_CREDENTIALS"]))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
                         aws_secret_access_key=aws_credentials["aws_secret_access_key"],
                         region_name="eu-west-1")


# prod SQL credentaials
prod_sql_credentials = json.load(open(os.environ["PROD_SQL_CREDENTIALS"]))
prod_sql_engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(prod_sql_credentials["user"], prod_sql_credentials["password"],
                           prod_sql_credentials["host"], prod_sql_credentials["port"],
                           prod_sql_credentials["database"]))

Session = sessionmaker(bind=prod_sql_engine)
session = Session()
Base = automap_base()
Base.prepare(prod_sql_engine, reflect=True)
KeypointAnnotations = Base.classes.keypoint_annotations
FishDetections = Base.classes.fish_detections



In [None]:
# perform join between KeypointAnnotatios and FishDetections

records = session.query(
    KeypointAnnotations, FishDetections
).outerjoin(
    FishDetections, KeypointAnnotations.fish_detection_id == FishDetections.id
).all()



<h1> Iterate over query results and generate 3D coordinates for each stereo fish detection </h1>

In [None]:
df = pd.DataFrame()
for record in records:
    keypoint_annotation = record.keypoint_annotations
    fish_detection = record.fish_detections
    keypoints = keypoint_annotation.keypoints
    
    if keypoints:
        if keypoints.get('leftCrop') and keypoints.get('rightCrop'):

            # record image URLs and annotated keypoint coordinates
            left_image_url = fish_detection.left_image_url
            right_image_url = fish_detection.right_image_url
            left_keypoints = keypoints['leftCrop']
            right_keypoints = keypoints['rightCrop']
            
            # compute world coordinates
            camera_metadata = fish_detection.camera_metadata
            camera_metadata['pixelCountHeight'] = 3000
            camera_metadata['pixelCountWidth'] = 4096
            world_keypoint_coordinates = pixel2world(left_keypoints, right_keypoints, camera_metadata)
            
            row = {
                'keypoint_annotation_id': keypoint_annotation.id,
                'fish_detection_id': fish_detection.id,
                'captured_at': fish_detection.captured_at,
                'is_qa': keypoint_annotation.is_qa,
                'left_image_url': left_image_url,
                'right_image_url': right_image_url,
                'left_keypoints': left_keypoints,
                'right_keypoints': right_keypoints,
                'world_keypoint_coordinates': world_keypoint_coordinates,
                'site_id': fish_detection.site_id,
                'pen_id': fish_detection.pen_id,
                'camera_metadata': camera_metadata,
                'left_crop_metadata': fish_detection.left_crop_metadata,
                'right_crop_metadata': fish_detection.right_crop_metadata
            }
            df = df.append(row, ignore_index=True)

df.index = pd.to_datetime(df.captured_at)


<h1> Compute biomass estimate using Linear Model + PCA + interaction features for pairwise distances </h1>

In [None]:
def coord2biomass_linear(world_keypoints, model):
    """from coordinates to biomass"""

    mean = model['mean']
    std= model['std']
    PCA_components = model['PCA_components']
    reg_coef = model['reg_coef']
    reg_intercept = model['reg_intercept']
    body_parts = model['body_parts']

    # calculate pairwise distances for production coord
    # based on the exact ordering reflected in the body_parts
    # variable above

    pairwise_distances = []
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            dist = euclidean_distance(world_keypoints[body_parts[i]], world_keypoints[body_parts[j]])
            pairwise_distances.append(dist)
    print(pairwise_distances)
    
    interaction_values = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            dist1 = pairwise_distances[i]
            dist2 = pairwise_distances[j]
            interaction_values.append(dist1 * dist2)

    X = np.array(pairwise_distances + interaction_values)

    X_normalized = (X - model['mean']) / model['std']
    X_transformed = np.dot(X_normalized, model['PCA_components'].T)
    prediction = np.dot(X_transformed, reg_coef) + reg_intercept
    return prediction


def coord2biomass_blender(world_keypoints, blender):
    """from coordinates to biomass"""

    reverse_mapping = blender["reverse_mapping"]
    distances = np.array(blender["distances"])
    volumes = blender["volume"]
    regression_coeff = blender["coeff"]

    # calculate pairwise distances for production coord
    # the reverse mapping insure that we listing the kp
    # in the same order
    measurements = []
    number_of_parts = len(world_keypoints)
    for k in range(number_of_parts):
        v = world_keypoints[reverse_mapping[str(k)]]
        for k0 in range(k+1, number_of_parts):
            v0 = world_keypoints[reverse_mapping[str(k0)]]
            dist = euclidean_distance(v, v0)*1000 # mm to m
            measurements.append(dist)
    measurements = np.array(measurements)

    # absolute diff
    diff = np.nanmean(np.abs(distances - measurements), axis=1)
    closest = np.argmin(diff)
    prediction = volumes[closest]

    # here is some machine learning
    prediction = prediction*regression_coeff[0] + regression_coeff[1]
    return prediction

In [None]:
model = pickle.load(open('./model.pkl', 'rb'))
blender = json.load(open('./volumes.json'))
linear_biomass_values, blender_biomass_values, lengths = [], [], []
i = 0
for idx, row in df.iterrows():
    if i % 10 == 0:
        print(i)
    i += 1
    
    linear_biomass_values.append(coord2biomass_linear(row.world_keypoint_coordinates, model))
    blender_biomass_values.append(coord2biomass_blender(row.world_keypoint_coordinates, blender))
    lengths.append(euclidean_distance(row.world_keypoint_coordinates['UPPER_LIP'], row.world_keypoint_coordinates['TAIL_NOTCH']))
df['predicted_biomass_linear'] = linear_biomass_values
df['predicted_biomass_blender'] = blender_biomass_values
df['length'] = lengths
    

In [None]:
invalid_fish_detection_ids = json.load(open('./invalid_fish_detection_ids.json'))
df['is_valid'] = 1
for invalid_fish_detection_id in invalid_fish_detection_ids:
    df.loc[df.fish_detection_id == invalid_fish_detection_id, 'is_valid'] = 0

In [None]:
mask = (df.index >= '2019-04-28') & (df.site_id == 23) & (df.pen_id == 4) & (df.is_valid == 1) & (df.is_qa == 1)
df[mask].sort_values('predicted_biomass_linear', ascending=False)

In [None]:
def plot_coordinates(image_url, side, keypoints):
    image_f = './image.jpg'
    bucket = 'aquabyte-crops'
    key = image_url[image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    s3_client.download_file(bucket, key, image_f)
    plt.figure(figsize=(30, 10))
    im = plt.imread(image_f)
    print(image_url, im.shape)
    
    for keypoint in keypoints:
        keypoint_type = keypoint['keypointType']
        x, y = keypoint['xCrop'], keypoint['yCrop']
        plt.scatter([x], [y])
        plt.annotate(keypoint_type, (x, y), color='red')
        
    plt.imshow(im)        
    

In [None]:
fish_detection_id = 239557
fish_id_mask = df.fish_detection_id == fish_detection_id
left_image_url = df[fish_id_mask].left_image_url.iloc[0]
left_keypoints = df[fish_id_mask].left_keypoints.iloc[0]
right_image_url = df[fish_id_mask].right_image_url.iloc[0]
right_keypoints = df[fish_id_mask].right_keypoints.iloc[0]

world_keypoint_coordinates = df[fish_id_mask].world_keypoint_coordinates.iloc[0]
print(euclidean_distance(world_keypoint_coordinates['UPPER_LIP'], world_keypoint_coordinates['TAIL_NOTCH']))
plot_coordinates(right_image_url, 'right', right_keypoints)
plot_coordinates(left_image_url, 'left', left_keypoints)

In [None]:
df['2019-05-02']['predicted_biomass_blender'].mean()

In [None]:
mask = (df.site_id == 23) & (df.pen_id == 4) & (df.index >= '2019-04-27')
df[mask].predicted_biomass_blender.resample('D', how=lambda x: x.mean())

In [None]:
plt.hist(df[mask].predicted_biomass_blender)

In [None]:
df[mask].shape

In [None]:
mask = (df.predicted_biomass_linear > 500) & (df.predicted_biomass_linear < 6000)
plt.scatter(df.ix[mask, 'predicted_biomass_blender'], df.ix[mask, 'predicted_biomass_linear'])
plt.show()

<h1> Examine rectification issue </h1>

In [None]:
df.shape

In [None]:
df = pd.read_csv('./data_dump.csv')

In [None]:
rectified_bucket = 'aquabyte-crops'
left_image_rectified_f = './left_image_rectified.jpg'
right_image_rectified_f = './right_image_rectified.jpg'

invalid_fish_detection_ids, invalid_urls = [], []
i = 0
for idx, row in df.iterrows():
    if i % 100 == 0:
        print(i)
    i += 1
    if i < 36132:
        continue
    left_image_url = row.left_image_url
    right_image_url = row.right_image_url
    left_rectified_key = left_image_url[left_image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    s3_client.download_file(rectified_bucket, left_rectified_key, left_image_rectified_f)
    right_rectified_key = right_image_url[right_image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    s3_client.download_file(rectified_bucket, right_rectified_key, right_image_rectified_f)
    
    # this is dumb, can probably do this in memory
    left_rectified_image = cv2.imread(left_image_rectified_f)
    right_rectified_image = cv2.imread(right_image_rectified_f)
    
    left_crop_metadata = json.loads(row.left_crop_metadata)
    right_crop_metadata = json.loads(row.right_crop_metadata)
    left_crop_width = left_crop_metadata['width']
    left_crop_height = left_crop_metadata['height']
    right_crop_width = right_crop_metadata['width']
    right_crop_height = right_crop_metadata['height']
    
    invalid = False
    if left_rectified_image.shape[0] == left_crop_height and left_rectified_image.shape[1] == left_crop_width:
        invalid = True
        invalid_urls.append(left_image_url)
        print('left image not rectified for id {}!'.format(row.id))
    if right_rectified_image.shape[0] == right_crop_height and right_rectified_image.shape[1] == right_crop_width:
        invalid = True
        invalid_urls.append(right_image_url)
        print('right image not rectified for id {}!'.format(row.id))
    
    if invalid:
        invalid_fish_detection_ids.append(int(row.id))
    
    
    
        
    
        

In [None]:
pickle.dump(invalid_ids, open('./invalid_ids', 'wb'))

In [None]:
i

In [None]:
json.dump(invalid_urls + invalid_urls_old, open('./invalid_urls.json', 'w'))

In [None]:
invalid_urls_old = json.load(open('./invalid_urls.json'))

In [None]:
invalid_fish_detection_ids_old = json.load(open('./invalid_fish_detection_ids.json'))

In [None]:
json.dump(invalid_fish_detection_ids + invalid_fish_detection_ids_old, open('./invalid_fish_detection_ids.json', 'w'))