In [None]:
"""
This module contains constants representing core & auxiliary fish body parts.
"""

UPPER_LIP = 'UPPER_LIP'
EYE = 'EYE'
PECTORAL_FIN = 'PECTORAL_FIN'
DORSAL_FIN = 'DORSAL_FIN'
PELVIC_FIN = 'PELVIC_FIN'
ADIPOSE_FIN = 'ADIPOSE_FIN'
ANAL_FIN = 'ANAL_FIN'
TAIL_NOTCH = 'TAIL_NOTCH'
UPPER_PRECAUDAL_PIT = 'UPPER_PRECAUDAL_PIT'
LOWER_PRECAUDAL_PIT = 'LOWER_PRECAUDAL_PIT'
HYPURAL_PLATE = 'HYPURAL_PLATE'

core_body_parts = sorted([UPPER_LIP,
                          EYE,
                          PECTORAL_FIN,
                          DORSAL_FIN,
                          PELVIC_FIN,
                          ADIPOSE_FIN,
                          ANAL_FIN,
                          TAIL_NOTCH])

auxiliary_body_parts = sorted([UPPER_PRECAUDAL_PIT,
                               LOWER_PRECAUDAL_PIT,
                               HYPURAL_PLATE])

all_body_parts = sorted(core_body_parts + auxiliary_body_parts)

In [None]:
"""This module contains utility helper functions for the WeightEstimator class."""

from collections import namedtuple
from typing import Dict, List, Tuple
import numpy as np
import torch


CameraMetadata = namedtuple('CameraMetadata',
                            ['focal_length', 'focal_length_pixel', 'baseline_m',
                             'pixel_count_width', 'pixel_count_height', 'image_sensor_width',
                             'image_sensor_height'])


def get_left_right_keypoint_arrs(annotation: Dict[str, List[Dict]]) -> Tuple:
    """Gets numpy array of left and right keypoints given input keypoint annotation.
    Args:
        annotation: dict with keys 'leftCrop' and 'rightCrop'. Values are lists where each element
        is a dict with keys 'keypointType', 'xCrop' (num pixels from crop left edge),
        'yCrop' (num pixels from crop top edge), 'xFrame' (num pixels from full frame left edge),
        and 'yFrame' (num pixels from full frame top edge).
    Returns:
        X_left: numpy array containing left crop (xFrame, yFrame) for each key-point ordered
        alphabetically.
        X_right: same as above, but for right crop.
    """

    left_keypoints, right_keypoints = {}, {}
    for item in annotation['leftCrop']:
        body_part = item['keypointType']
        left_keypoints[body_part] = (item['xFrame'], item['yFrame'])

    for item in annotation['rightCrop']:
        body_part = item['keypointType']
        right_keypoints[body_part] = (item['xFrame'], item['yFrame'])

    left_keypoint_arr, right_keypoint_arr = [], []
    for body_part in core_body_parts:
        left_keypoint_arr.append(left_keypoints[body_part])
        right_keypoint_arr.append(right_keypoints[body_part])

    X_left = np.array(left_keypoint_arr)
    X_right = np.array(right_keypoint_arr)
    return X_left, X_right


def normalize_left_right_keypoint_arrs(X_left: np.ndarray, X_right: np.ndarray) -> Tuple:
    """Normalizes input left and right key-point arrays. The normalization involves (1) 2D
    translation of all keypoints such that they are centered, (2) rotation of the 2D coordiantes
    about the center such that the line passing through UPPER_LIP and fish center is horizontal.
    """

    # translate key-points, perform reflection if necessary
    upper_lip_idx = core_body_parts.index(UPPER_LIP)
    tail_notch_idx = core_body_parts.index(TAIL_NOTCH)
    if X_left[upper_lip_idx, 0] > X_left[tail_notch_idx, 0]:
        X_center = 0.5 * (np.max(X_left, axis=0) + np.min(X_left, axis=0))
        X_left_centered = X_left - X_center
        X_right_centered = X_right - X_center
    else:
        X_center = 0.5 * (np.max(X_right, axis=0) + np.min(X_right, axis=0))
        X_left_centered = X_right - X_center
        X_right_centered = X_left - X_center
        X_left_centered[:, 0] = -X_left_centered[:, 0]
        X_right_centered[:, 0] = -X_right_centered[:, 0]

    # rotate key-points
    upper_lip_x, upper_lip_y = tuple(X_left_centered[upper_lip_idx])
    theta = np.arctan(upper_lip_y / upper_lip_x)
    R = np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

    D = X_left_centered - X_right_centered
    X_left_rot = np.dot(X_left_centered, R)
    X_right_rot = X_left_rot - D
    return X_left_rot, X_right_rot


def convert_to_world_point_arr(X_left: np.ndarray, X_right: np.ndarray,
                               camera_metadata: CameraMetadata) -> np.ndarray:
    """Converts input left and right normalized keypoint arrays into world coordinate array."""

    y_world = camera_metadata.focal_length_pixel * camera_metadata.baseline_m / \
              (X_left[:, 0] - X_right[:, 0])

    # Note: the lines commented out below are technically the correct formula for conversion
    # x_world = X_left[:, 0] * y_world / camera_metadata.focal_length_pixel
    # z_world = -X_left[:, 1] * y_world / camera_metadata.focal_length_pixel
    x_world = ((X_left[:, 0] * camera_metadata.image_sensor_width / camera_metadata.pixel_count_width) * y_world) / (camera_metadata.focal_length)
    z_world = (-(X_left[:, 1] * camera_metadata.image_sensor_height / camera_metadata.pixel_count_height) * y_world) / (camera_metadata.focal_length)
    X_world = np.vstack([x_world, y_world, z_world]).T
    return X_world


def stabilize_keypoints(X: np.ndarray) -> np.ndarray:
    """Transforms world coordinate array so that neural network inputs are stabilized"""
    X_new = np.zeros(X.shape)
    X_new[:, 0] = 0.5 * X[:, 0] / X[:, 1]
    X_new[:, 1] = 0.5 * X[:, 2] / X[:, 1]
    X_new[:, 2] = 0.05 / X[:, 1]
    return X_new


def convert_to_nn_input(annotation: Dict[str, List[Dict]], camera_metadata: CameraMetadata) \
        -> torch.Tensor:
    """Convrts input keypoint annotation and camera metadata into neural network tensor input."""
    X_left, X_right = get_left_right_keypoint_arrs(annotation)
    X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
    X_world = convert_to_world_point_arr(X_left_norm, X_right_norm, camera_metadata)
    X = stabilize_keypoints(X_world)
    nn_input = torch.from_numpy(np.array([X])).float()
    return nn_input

In [None]:
"""
This module contains the WeightEstimator class for estimating fish weight (g), length (mm), and
k-factor given input keypoint coordinates and camera metadata.
"""

from typing import Dict, Tuple
import torch
from torch import nn


class Network(nn.Module):
    """Network class defines neural-network architecture for both weight and k-factor estimation
    (currently both neural networks share identical architecture)."""

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        """Run inference on input keypoint tensor."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x


class WeightEstimator:
    """WeightEstimator class is used to predict fish weight, k-factor, and length
    given input keypoint annotations and camera metadata."""

    def __init__(self, weight_model_f: str, kf_model_f: str) -> None:
        """Initializes class with input weight and k-factor neural-networks."""
        self.weight_model = Network()
        self.weight_model.load_state_dict(torch.load(weight_model_f))
        self.weight_model.eval()

        self.kf_model = Network()
        self.kf_model.load_state_dict(torch.load(kf_model_f))
        self.kf_model.eval()

    @staticmethod
    def _get_model_input(annotation: Dict, camera_metadata: CameraMetadata) -> torch.Tensor:
        """Generates neural-network input tensor given annotation and camera_metadata."""
        X = convert_to_nn_input(annotation, camera_metadata)
        return X

    def predict_weight(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates weight prediction given input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        weight = 1e4 * self.weight_model(X).item()
        return weight

    def predict_kf(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates k-factor prediction gievn input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        kf = self.kf_model(X).item()
        return kf

    def predict(self, annotation: Dict, camera_metadata: CameraMetadata) -> Tuple:
        """Generates weight, k-factor, and length predictions given input annotation and camera
        metadata."""
        weight = self.predict_weight(annotation, camera_metadata)
        kf = self.predict_kf(annotation, camera_metadata)
        if weight * kf > 0:
            length = (1e5 * weight / kf) ** (1.0 / 3)
        else:
            length = 0
        return weight, length, kf

In [None]:
import json
import os
import cv2
import numpy as np
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from research.weight_estimation.keypoint_utils.body_parts import core_body_parts
from research.utils.image_utils import Picture
from scipy.spatial import Delaunay
from itertools import compress
from PIL import Image
import time

def in_hull(p, hull):
    hull = Delaunay(hull)
    return hull.find_simplex(p) >= 0


def apply_convex_hull_filter(kp, des, canonical_kps, bbox):
    X_canon_kps = np.array(list(canonical_kps.values()))
    X_kp = np.array([x.pt for x in kp]).reshape(-1, 2) + np.array([bbox['x_min'], bbox['y_min']])
    is_valid = in_hull(X_kp, X_canon_kps)
    kp = list(compress(kp, is_valid))
    des = des[is_valid]
    return kp, des


def get_homography_and_matches(sift, left_patch, right_patch,
                               left_kps, right_kps,
                               left_bbox, right_bbox,
                               good_perc=0.7, min_match_count=3):

    kp1, des1 = sift.detectAndCompute(left_patch, None)
    kp2, des2 = sift.detectAndCompute(right_patch, None)
    try:
        if not (des1.any() and des2.any()):
            return None, kp1, kp2, None, [0]
    except AttributeError:
        print("None type for detectAndComputer descriptor")
        return None, kp1, kp2, None, [0]
    # apply convex hull filter
    kp1, des1 = apply_convex_hull_filter(kp1, des1, left_kps, left_bbox)
    kp2, des2 = apply_convex_hull_filter(kp2, des2, right_kps, right_bbox)
    
    print(len(kp1), len(kp2))
#     bf = cv2.BFMatcher()
#     matches = bf.knnMatch(des1, des2, k=2)
    bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
    matches = bf.knnMatch(des1, des2, 2)

    H, matches_mask = np.eye(3), []
    good = []

    # check that matches list contains actual pairs
    if len(matches) > 0:
        if len(matches[0]) != 2:
            print('Aborting: matches list does not contain pairs')
            return H, kp1, kp2, good, matches_mask

    for m, n in matches:
        if m.distance < good_perc * n.distance:
            good.append(m)

    if len(good) >= min_match_count:
        src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
        H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        matches_mask = mask.ravel().tolist()
    return H, kp1, kp2, good, matches_mask


def generate_sift_adjustment(bp, left_crop_metadata, left_fish_picture, left_kps, right_crop_metadata,
                             right_fish_picture, right_kps, sift):
    left_kp, right_kp = left_kps[bp], right_kps[bp]
    left_crop, left_bbox = left_fish_picture.generate_crop_given_center(left_kp[0], left_kp[1], 600, 200)
    right_crop, right_bbox = right_fish_picture.generate_crop_given_center(right_kp[0], right_kp[1], 600, 200)

    H, _, _, _, matches_mask = get_homography_and_matches(sift, left_crop, right_crop,
                                                          left_kps, right_kps,
                                                          left_bbox, right_bbox)
    num_matches = sum(matches_mask)
    if H is not None:
        local_left_kp = [left_kp[0] - left_bbox['x_min'], left_kp[1] - left_bbox['y_min']]
        local_right_kp = cv2.perspectiveTransform(
            np.array([local_left_kp[0], local_left_kp[1]]).reshape(-1, 1, 2).astype(float), H).squeeze()
        right_kp = [local_right_kp[0] + right_bbox['x_min'], local_right_kp[1] + right_bbox['y_min']]
    left_item = {
        'keypointType': bp,
        'xCrop': left_kp[0],
        'yCrop': left_kp[1],
        'xFrame': left_crop_metadata['x_coord'] + left_kp[0],
        'yFrame': left_crop_metadata['y_coord'] + left_kp[1]
    }
    right_item = {
        'keypointType': bp,
        'xCrop': right_kp[0],
        'yCrop': right_kp[1],
        'xFrame': right_crop_metadata['x_coord'] + right_kp[0],
        'yFrame': right_crop_metadata['y_coord'] + right_kp[1]
    }
    return left_item, right_item, num_matches


def generate_refined_keypoints(ann, left_crop_url, right_crop_url):

    left_kps = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['leftCrop']}
    right_kps = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['rightCrop']}

    left_crop_metadata = {
        'x_coord': ann['leftCrop'][0]['xFrame'] - ann['leftCrop'][0]['xCrop'],
        'y_coord': ann['leftCrop'][0]['yFrame'] - ann['leftCrop'][0]['yCrop']
    }
    right_crop_metadata = {
        'x_coord': ann['rightCrop'][0]['xFrame'] - ann['rightCrop'][0]['xCrop'],
        'y_coord': ann['rightCrop'][0]['yFrame'] - ann['rightCrop'][0]['yCrop']
    }

    left_image = Image.open(left_crop_url)
    right_image = Image.open(right_crop_url)
#     left_fish_picture = np.array(left_image)#Picture(image_arr=np.array(left_image))
#     right_fish_picture = np.array(right_image)#Picture(image_arr=np.array(right_image))
    left_fish_picture = Picture(image_arr=np.array(left_image)).image_arr
    right_fish_picture = Picture(image_arr=np.array(right_image)).image_arr
#     left_fish_picture = Picture(image_url=left_crop_url)
#     right_fish_picture = Picture(image_url=right_crop_url)
#     left_fish_picture.enhance(in_place=True)
#     right_fish_picture.enhance(in_place=True)
    
    SCALE_PERCENT      = 50
    orig_width = left_fish_picture.shape[1]
    orig_height = left_fish_picture.shape[0]
    width = int(orig_width * SCALE_PERCENT / 100)
    height = int(orig_height * SCALE_PERCENT / 100)
    dim = (width, height)
    left_fish_picture = cv2.resize(left_fish_picture, dim, interpolation = cv2.INTER_AREA)
    left_fish_picture = Picture(image_arr=left_fish_picture)
    left_fish_picture.enhance(in_place=True)
    dim = (orig_width, orig_height)
    left_fish_picture = cv2.resize(left_fish_picture.image_arr, dim, interpolation = cv2.INTER_AREA)
    left_fish_picture = Picture(image_arr=left_fish_picture)
    
    orig_width = right_fish_picture.shape[1]
    orig_height = right_fish_picture.shape[0]
    width = int(orig_width * SCALE_PERCENT / 100)
    height = int(orig_height * SCALE_PERCENT / 100)
    dim = (width, height)
    right_fish_picture = cv2.resize(right_fish_picture, dim, interpolation = cv2.INTER_AREA)
    right_fish_picture = Picture(image_arr=right_fish_picture)
    right_fish_picture.enhance(in_place=True)
    dim = (orig_width, orig_height)
    right_fish_picture = cv2.resize(right_fish_picture.image_arr, dim, interpolation = cv2.INTER_AREA)
    right_fish_picture = Picture(image_arr=right_fish_picture)
    
#     sift = cv2.KAZE_create()
    sift = cv2.AKAZE_create()
    left_items, right_items = [], []
#     start = time.time()
    for bp in core_body_parts:
        left_item, right_item, num_matches = generate_sift_adjustment(bp, left_crop_metadata, left_fish_picture,
                                                                      left_kps, right_crop_metadata,
                                                                      right_fish_picture, right_kps, sift)
        left_items.append(left_item)
        right_items.append(right_item)
#     end = time.time()
#     print(end-start)
    modified_ann = {
        'leftCrop': left_items,
        'rightCrop': right_items
    }
    return modified_ann

# sift               = cv2.AKAZE_create()
# SCALE_PERCENT      = 100                                                   # percent
# RANSAC_THRESH      = 10.0
# INLIER_THRESH      = 30

# def get_kp_desc(image):
#     width = int(image.shape[1] * SCALE_PERCENT / 100)
#     height = int(image.shape[0] * SCALE_PERCENT / 100)
#     dim = (width, height)
#     image_rz = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
#     img = enhance(image_rz)
#     kp1, des1 = sift.detectAndCompute(img, None)
#     return (kp1, des1)

# def find_matches(im1, im2):
#     good = []
#     (kp1, des1) = get_kp_desc(im1)
#     (kp2, des2) = get_kp_desc(im2)
#     matcher = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
#     matches = matcher.knnMatch(des1, des2, 2)
#     # print("Raw Matches %d" % len(matches))
#     inliers = 0
#     for m, n in matches:
#         if m.distance < GOOD_PERC * n.distance:
#             good.append(m)
#     # print("Good Matches %d" % len(good))
#     if len(good) >= MIN_MATCH_COUNT:
#         src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
#         dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
#         for j in range(len(src_pts)):
#             src_pts[j][0][0] *= 100/SCALE_PERCENT
#             src_pts[j][0][1] *= 100/SCALE_PERCENT
#         for j in range(len(dst_pts)):
#             dst_pts[j][0][0] *= 100/SCALE_PERCENT
#             dst_pts[j][0][1] *= 100/SCALE_PERCENT
#         _, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, RANSAC_THRESH)
#         matches_mask = mask.ravel().tolist()
#         inliers = sum(matches_mask)
#     else:
#         print("Not enough matches are found - %d/%d" % (len(good), MIN_MATCH_COUNT))
#         matches_mask = None
#     print("Final Inlier Matches %d" % inliers)
#     return inliers #> 30



def main():
    s3_access_utils = s3_access_utils = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))

    rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

    query = """
        SELECT * FROM keypoint_annotations
        WHERE pen_id=5
        AND captured_at BETWEEN '2019-06-05' AND '2019-07-02'
        AND keypoints is not null
        AND keypoints -> 'leftCrop' is not null
        AND keypoints -> 'rightCrop' is not null
        AND is_qa = FALSE
        LIMIT 1;
    """

    modified_anns = []

    df = rds_access_utils.extract_from_database(query)

    for idx, row in df.iterrows():
        # get annotation information
        ann = row.keypoints
        left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
        left_crop_metadata, right_crop_metadata = row.left_crop_metadata, row.right_crop_metadata

        modified_ann = generate_refined_keypoints(ann, left_crop_url, right_crop_url)

        modified_anns.append(modified_ann)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import statsmodels.api as sm
import os

from filter_optimization.filter_optimization_task import extract_biomass_data

from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

In [None]:
# df = pd.read_csv('gtsf_data.csv')
df = pd.read_csv('/root/data/alok/biomass_estimation/playground/gtsf_akpr2.csv')

depths = []
depths2 = []
depths3 = []
lengths = []
lengths2 = []
lengths3 = []
lengths4 = []
widths = []
widths2 = []

lengths_adj = []
    
modified_depths = []
modified_lengths = []
modified_widths = []

weights = []
weights2 = []
weights3 = []

mask = []

weight_model_f = 'weight_model.pb'
# weight_model2_f = '/root/data/alok/biomass_estimation/playground/output_model_bryton.pb'
# weight_model3_f = '/root/data/alok/biomass_estimation/playground/output_model_bryton2.pb'
kf_model_f = 'kf_model.pb'
weight_estimator2 = WeightEstimator(weight_model_f, kf_model_f)
# weight_estimator3 = WeightEstimator(weight_model2_f, kf_model_f)
# weight_estimator4 = WeightEstimator(weight_model3_f, kf_model_f)

for idx, row in df.iterrows():
#     print(idx, len(df))
    ann, modified_ann, cm = eval(row.keypoints), eval(row.modified_keypoints), eval(row.camera_metadata)
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    modified_wkps = pixel2world(modified_ann['leftCrop'], modified_ann['rightCrop'], cm)
    
    cm_adj = CameraMetadata(
        focal_length=cm['focalLength'],
        focal_length_pixel=cm['focalLengthPixel'],
        baseline_m=cm['baseline'],
        pixel_count_width=cm['pixelCountWidth'],
        pixel_count_height=cm['pixelCountHeight'],
        image_sensor_width=cm['imageSensorWidth'],
        image_sensor_height=cm['imageSensorHeight']
    )
#     print(ann['leftCrop'])
#     left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
#     modified_ann = generate_refined_keypoints(ann, left_crop_url, right_crop_url)
#     modified_wkps = pixel2world(modified_ann['leftCrop'], modified_ann['rightCrop'], cm)
    
#     modified_depth = np.median([wkp[1] for wkp in modified_wkps.values()])
#     modified_depths.append(modified_depth)
#     modified_lengths.append(np.linalg.norm(modified_wkps['UPPER_LIP'] - modified_wkps['TAIL_NOTCH']))
#     modified_widths.append(np.linalg.norm(np.linalg.norm(modified_wkps['DORSAL_FIN'] - modified_wkps['PELVIC_FIN'])))
    
#     centroid = .5 * (wkps['DORSAL_FIN'] + wkps['PELVIC_FIN'])
# #     angle = np.linalg.norm(np.array(get_angles(wkps['UPPER_LIP'], centroid)) - np.array(get_angles(centroid, wkps['TAIL_NOTCH'])))
#     a = (wkps['UPPER_LIP'] - centroid) / np.linalg.norm(wkps['UPPER_LIP'] - centroid)
#     b = (wkps['TAIL_NOTCH'] - centroid) / np.linalg.norm(wkps['TAIL_NOTCH'] - centroid)

    new_weight, new_length, new_k_factor = weight_estimator2.predict(ann, cm_adj)
    weights.append(new_weight)
    new_weight, new_length, new_k_factor = weight_estimator2.predict(modified_ann, cm_adj)
    weights2.append(new_weight)
#     new_weight, new_length, new_k_factor = weight_estimator4.predict(ann, cm_adj)
#     weights3.append(new_weight)
        
#     wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
#     if 'HYPURAL_PLATE' not in wkps:
#         mask.append(False)
#         continue
#     else:
#         mask.append(True)
    depth = np.median([wkp[1] for wkp in wkps.values()])
    depths.append(depth)
    depth2 = np.median([wkp[1] for wkp in modified_wkps.values()])
    depths2.append(depth2)
    depth3 = np.min([wkp[1] for wkp in modified_wkps.values()])
    depths3.append(depth3)
    
    lengths.append(np.linalg.norm(wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']))
    lengths2.append(np.linalg.norm(modified_wkps['UPPER_LIP'] - modified_wkps['TAIL_NOTCH']))
#     lengths2.append(np.linalg.norm(wkps['UPPER_LIP'] - wkps['HYPURAL_PLATE']))
#     lengths3.append(np.linalg.norm(wkps['EYE'] - wkps['TAIL_NOTCH']))
#     lengths4.append(np.linalg.norm(wkps['EYE'] - wkps['HYPURAL_PLATE']))
    widths.append(np.linalg.norm(wkps['DORSAL_FIN'] - wkps['PELVIC_FIN']))
    widths2.append(np.linalg.norm(modified_wkps['DORSAL_FIN'] - modified_wkps['PELVIC_FIN']))
#     widths2.append(np.linalg.norm(wkps['ADIPOSE_FIN'] - wkps['PECTORAL_FIN']))
#     lengths_adj.append(np.linalg.norm(wkps['UPPER_LIP'] - centroid) + np.linalg.norm(centroid - wkps['TAIL_NOTCH']))
    
# df = df[mask]

df['depth'] = depths
df['depth2'] = depths2
df['depth3'] = depths3
df['length'] = lengths
df['length2'] = lengths2
# df['length3'] = lengths3
# df['length4'] = lengths4
# df['length_adj'] = lengths_adj
df['width'] = widths
df['width2'] = widths2
df['weights'] = weights
df['weights2'] = weights2
# df['weights3'] = weights3
# df['modified_depth'] = modified_depths
# df['modified_length'] = modified_lengths
# df['modified_width'] = modified_widths

In [None]:
df.iloc[0]

In [None]:
all_dists = []

for idx, row in df.iterrows():
    old = eval(row.keypoints)['rightCrop']
    new = eval(row.modified_keypoints)['rightCrop']
    
    dists = []
    
    for index, keypoint in enumerate(old):
        
        new_keypoints = [k for k in new if k['keypointType'] == keypoint['keypointType']]
        if len(new_keypoints) == 0:
            continue
        new_keypoint = new_keypoints[0]
        
#         print(keypoint['keypointType'], new_keypoint['keypointType'])
        
        xOld = keypoint['xFrame']
        yOld = keypoint['yFrame']
        xNew = new_keypoint['xFrame']
        yNew = new_keypoint['yFrame']
        
        print(xOld, yOld, xNew, yNew)
        
        dist = np.sqrt((xOld - xNew) ** 2 + (yOld - yNew) ** 2)
        
        dists.append(dist)
    
    all_dists.append(np.max(dists))
        
        

In [None]:
ad = np.array(all_dists)

df['akpr_distance'] = ad

plt.hist(ad[ad < 50])

In [None]:
plt.hist(df.weights2[df.depth3 > 0.3])

In [None]:
df2[df2.length2 > 1]

In [None]:
mask = (df.akpr_distance < 50) & (df.modified_akpd_score > 0.95)
df2 = df[mask]

plt.scatter(df2.weights, df2.weights2)

In [None]:
np.mean(df2.weight), np.mean(df2.weights), np.mean(df2.weights2)

In [None]:
plt.scatter(df2.length, df2.length2)

In [None]:
X = np.log(df2.weight)
X = sm.add_constant(X)
model = sm.OLS(np.log(df2.weights), X)
results = model.fit()

print(results.summary())

In [None]:
X = np.log(df2.weight)
X = sm.add_constant(X)
model = sm.OLS(np.log(df2.weights2), X)
results = model.fit()

print(results.summary())

In [None]:
X = np.log(df2['length'])
X = sm.add_constant(X)
model = sm.OLS(np.log(df2.weight), X)
results = model.fit()

print(results.summary())

In [None]:
X = np.log(df2['length2'])
X = sm.add_constant(X)
model = sm.OLS(np.log(df2.weights2), X)
results = model.fit()

print(results.summary())

In [None]:
np.min(df2.weights2)

In [None]:


plt.scatter(df2.length, df2.weight, color = 'blue')
plt.scatter(df2.length2, df2.weights2, color = 'orange')

In [None]:
# plt.scatter(df.weight, df.weights2, color = 'red')
plt.scatter(df2.weight, df2.weights, color = 'blue', alpha = 0.5)
plt.scatter(df2.weight, df2.weights2, color = 'green', alpha = 0.5)


In [None]:
count, bins, _ = plt.hist(df2.weights2 - df2.weight, bins = 30, color = 'blue', alpha = 0.5, density = True)
count, bins, _ = plt.hist(df2.weights - df2.weight, bins = bins, color = 'red', alpha = 0.5, density = True)
# count, bins, _ = plt.hist(df.weights3 - df.weight, bins = bins, color = 'green', alpha = 0.5, density = True)

In [None]:
np.mean(np.abs(df.weights - df.weight)), np.mean(np.abs(df.weights2 - df.weight)), np.mean(np.abs(df.weights3 - df.weight))

In [None]:
np.mean(df.weight), np.mean(df.weights), np.mean(df.weights2), np.mean(df.weights3)

In [None]:
# buckets = np.arange(0, 2, 0.1)
buckets = np.arange(0, 12000, 2000)

for bucket in buckets:
    mask = (df.weight > bucket) & (df.weight < bucket + 1000)
#     mask = (df2.depth > bucket) & (df2.depth < bucket + 0.1)
#     mask2 = (df2.depth2 > bucket) & (df2.depth2 < bucket + 0.1)
    df3 = df[mask]
#     df4 = df2[mask2]
    gt_weight = np.mean(df3.weight)
    avg_weight1 = np.mean(df3.weights)
    avg_weight2 = np.mean(df3.weights2)
    avg_weight3 = np.mean(df3.weights3)
#     print('%0.1f, %0.2f, %0.2f, %0.2f' % (bucket, gt_weight, avg_weight1, avg_weight2))
    print('%0.1f, %0.2f, %0.2f, %0.2f' % (bucket, 100 * (avg_weight1 - gt_weight) / gt_weight, 100 * (avg_weight2 - gt_weight) / gt_weight, 100 * (avg_weight3 - gt_weight) / gt_weight))
    
    

In [None]:
buckets = np.arange(0, 10000, 1000)
bias = []

for i in buckets:
    mask = (df.weight >= i) & (df.weight < (i + 1000))
    df2 = df[mask]
    bias.append((np.mean(df2.weights - df2.weight)) / np.mean(df2.weight))

In [None]:
plt.plot(buckets, bias)

In [None]:
counts, bins, _  = plt.hist(df.weight, bins = 20)

for index, count in enumerate(counts):
    print(count, bins[index])
    
len(df[df.weight > 7000]) / len(df)

In [None]:
df[df.weights2 > 100000]

In [None]:
row = df.loc[1140]
modified_ann = eval(row.modified_keypoints)
cm = eval(row.camera_metadata)

cm_adj = CameraMetadata(
    focal_length=cm['focalLength'],
    focal_length_pixel=cm['focalLengthPixel'],
    baseline_m=cm['baseline'],
    pixel_count_width=cm['pixelCountWidth'],
    pixel_count_height=cm['pixelCountHeight'],
    image_sensor_width=cm['imageSensorWidth'],
    image_sensor_height=cm['imageSensorHeight']
)

modified_wkps = pixel2world(modified_ann['leftCrop'], modified_ann['rightCrop'], cm)
np.linalg.norm(modified_wkps['UPPER_LIP'] - modified_wkps['TAIL_NOTCH'])


# weight_estimator2.predict(modified_ann, cm_adj)

In [None]:
modified_ann

In [None]:
modified_wkps

In [None]:
plt.scatter(df.weights, df.weights2)

In [None]:
plt.figure(figsize=(20, 10))

plt.scatter(df.length, df.weight)
plt.scatter(df.length, df.weights)

x = np.arange(np.min(df.length), np.max(df.length), 0.01)
x2 = np.arange(np.min(df.length), np.max(df.length) + 0.05, 0.01)
y = (23.6068 * x) ** 3
plt.plot(x, y, color = 'red')
y = (22 * x2) ** 3
plt.plot(x2, y, color = 'green')
y = (25 * x) ** 3
plt.plot(x, y, color = 'green')
plt.axhline(9000)
plt.xlabel('Length (m)')
plt.ylabel('Weight (g)')
plt.title('GTSF Length vs Weight: w = (23.6068 * l) ^ 3')

In [None]:
plt.scatter(np.log(df.length), np.log(df.weight))




In [None]:
def get_weight(length):
    return (length * 23.6068) ** 3

In [None]:
# df.ix[0]
print(np.sum(np.abs(df.weight - get_weight(df['length'])) > 3000) / len(df))

In [None]:
plt.scatter(df.length, df.weight ** (2))


In [None]:
plt.scatter(np.log(df.length), np.log(df.weight) - np.log(df.length) * 1.6489 - np.log(df.width) * 1.3924 - 11.4894)

In [None]:
plt.scatter(np.exp(11.4894 + 1.6489 * np.log(df.length) + 1.3924 * np.log(df.width)), np.exp(9.5091 + 3.0856 * np.log(df.length)))





In [None]:
plt.scatter(df.weight, np.exp(11.4894 + 1.6489 * np.log(df.length) + 1.3924 * np.log(df.width)))
plt.plot(df.weight, df.weight, color = 'red')

In [None]:
plt.hist(df.weight - np.exp(11.4894 + 1.6489 * np.log(df.length) + 1.3924 * np.log(df.width)), bins = 30)

In [None]:
plt.scatter(df.weight, np.exp(9.5091 + 3.0856 * np.log(df.length)))
plt.plot(df.weight, df.weight, color = 'red')

In [None]:
plt.scatter(np.log(df.length), np.log(df.weight) - np.log(df.length) * 3.0856 - 9.5091)

In [None]:
plt.scatter(df.length, df.weight ** (1/3.08) - df.length * 11.9685 - df.width * 41.2623)

In [None]:
plt.scatter(df.length, df.weight ** (1/3.08) - df.length * 23.8999)

In [None]:
X = np.log(df[['length', 'width']])
X = sm.add_constant(X)
model = sm.OLS(np.log(df.weight), X)
results = model.fit()

print(results.summary())

In [None]:
import scipy.stats as stats

sm.qqplot(results.resid, fit = True, line="45")

In [None]:
X = df[['length', 'width']]
# X = sm.add_constant(X)
model = sm.OLS(df.weight ** (1 / 3.0856), X)
results = model.fit()

print(results.summary())

In [None]:
import scipy.stats as stats

sm.qqplot(results.resid, fit = True, line="45")

In [None]:
X = df.length
model = sm.OLS(df.weight ** (1/3), X)
results = model.fit()

print(results.summary())

In [None]:
X1 = df[['length']]
X2 = df[['length', 'width']]
X3 = df[['length', 'width', 'width2']]
# X1 = sm.add_constant(X1)
# X2 = sm.add_constant(X2)
# X3 = sm.add_constant(X3)
model1 = sm.OLS(df.weight ** (1/3), X1)
model2 = sm.OLS(df.weight ** (1/3), X2)
model3 = sm.OLS(df.weight ** (1/3), X3)
results1 = model1.fit()
results2 = model2.fit()
results3 = model3.fit()

print(results2.summary())

In [None]:
from sklearn.linear_model import Ridge
model = Ridge(alpha=2.0, fit_intercept=False, normalize = True)
model.fit(X2, df.weight ** (1/3))
# model.fit(X2 ** 3, df.weight)
print(model.coef_)

In [None]:
np.mean(np.abs(results.resid)), np.percentile(np.abs(results.resid), 99.99)

In [None]:
# df['width2'] = ((df['width'] * .5) ** 2 * df['length']) ** (1/3)

X1 = df[['length', 'length2', 'length3', 'length4']]
X2 = df[['length2']]
X3 = df[['length3']]
X4 = df[['length4']]
# X3 = df[['length', 'length2']]
model1 = sm.OLS(df.weight ** (1/3), X1)
model2 = sm.OLS(df.weight ** (1/3), X2)
model3 = sm.OLS(df.weight ** (1/3), X3)
model4 = sm.OLS(df.weight ** (1/3), X4)
results1 = model1.fit()
results2 = model2.fit()
results3 = model3.fit()
results4 = model4.fit()

print(results2.summary())

In [None]:
diff1 = np.abs(df.weight - results1.predict(X1) ** 3)
df['diff1'] = diff1
np.mean(diff1), np.percentile(diff1, 95)

In [None]:
diff2 = np.abs(df.weight - results2.predict(X2) ** 3)
df['diff2'] = diff2

np.mean(diff2), np.percentile(diff2, 95)

In [None]:
# diff2 = np.abs(df.weight - model.predict(X2) ** 3)

# np.mean(diff2), np.percentile(diff2, 95)

In [None]:
diff3 = np.abs(df.weight - results3.predict(X3) ** 3)
df['diff3'] = diff3

np.mean(diff3), np.percentile(diff3, 95)

In [None]:
diff4 = np.abs(df.weight - results4.predict(X4) ** 3)
df['diff4'] = diff4

np.mean(diff4), np.percentile(diff4, 95)

In [None]:
df.head()

In [None]:
df[(diff3 > 1000) | (diff3 < 0)].groupby(['fish_id']).size().reset_index(name='count').sort_values(['count'], ascending=False)

In [None]:
df2.iloc[0]

In [None]:
results.predict([.805, .22]) ** 3

In [None]:
df2 = df[df.fish_id == '190710-2dcdda0c-502d-4bfc-9ce4-6ebb343e3f52']
df2 = df2.sort_values(['diff3'])
# df2['diff'] = df2.weight - results.predict(df2[['length', 'width']]) ** 3
# df2[df2.diff1 < 500].head()
df2.iloc[-1]

In [None]:
df2.iloc[0]

In [None]:
def display_crops(left_image_f, right_image_f, ann, overlay_keypoints=True, show_labels=False):
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    left_image = plt.imread(left_image_f)
    right_image = plt.imread(right_image_f)
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    left_ann, right_ann = ann['leftCrop'], ann['rightCrop']
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in left_ann}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in right_ann}
    if overlay_keypoints:
        for bp, kp in left_keypoints.items():
            axes[0].scatter([kp[0]], [kp[1]], color='red', s=10)
            if show_labels:
                axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        for bp, kp in right_keypoints.items():
            axes[1].scatter([kp[0]], [kp[1]], color='red', s=10)
            if show_labels:
                axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
import ast

s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df2.loc[9535]
left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann, cm = ast.literal_eval(row.keypoints), ast.literal_eval(row.camera_metadata)
wkps1 = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)


display_crops(left_crop_f, right_crop_f, ann, True, True)

In [None]:
print(wkps1['EYE'], wkps1['TAIL_NOTCH'], np.linalg.norm(wkps1['EYE'] - wkps1['TAIL_NOTCH']))
print(wkps2['EYE'], wkps2['TAIL_NOTCH'], np.linalg.norm(wkps2['EYE'] - wkps2['TAIL_NOTCH']))

In [None]:
import ast

s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df2.loc[9535]
left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann, cm = ast.literal_eval(row.modified_keypoints), ast.literal_eval(row.camera_metadata)
wkps2 = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)


display_crops(left_crop_f, right_crop_f, ann, True, True)

In [None]:
import time


t0 = time.time()

row = df2.iloc[0]
# left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
# print(left_crop_url)
# print(right_crop_url)
left_crop_url, right_crop_url = 'left_frame.jpg', 'right_frame.jpg'


# left_crop_f, _, _ = s3.download_from_url(left_crop_url)
# right_crop_f, _, _ = s3.download_from_url(right_crop_url)
# ann, cm = ast.literal_eval(row.keypoints), ast.literal_eval(row.camera_metadata)
modified_ann = generate_refined_keypoints(ann, left_crop_url, right_crop_url)
wkps = pixel2world(modified_ann['leftCrop'], modified_ann['rightCrop'], cm)

t1 = time.time()

total = t1-t0

print(total)

# display_crops(left_crop_url, right_crop_url, modified_ann, True, True)
# display_crops(left_crop_url, right_crop_url, ann, True, True)

In [None]:
import ast

s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df2.iloc[-1]
left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann, cm = ast.literal_eval(row.keypoints), ast.literal_eval(row.camera_metadata)
modified_ann = generate_refined_keypoints(ann, left_crop_url, right_crop_url)
wkps = pixel2world(modified_ann['leftCrop'], modified_ann['rightCrop'], cm)


display_crops(left_crop_f, right_crop_f, modified_ann, True, True)

In [None]:
import ast

s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df2.iloc[-1]
left_crop_url, right_crop_url = row.left_image_url, row.right_image_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann, cm = ast.literal_eval(row.keypoints), ast.literal_eval(row.camera_metadata)
wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)


display_crops(left_crop_f, right_crop_f, ann, True, True)

In [None]:
wkps

In [None]:
for kp in wkps:
    print(kp, wkps[kp][1])

In [None]:
plt.hist(df2.length)

In [None]:
plt.scatter(df2.weight, results.predict(df2[['length', 'width']]) ** 3)

In [None]:
plt.scatter(df.weight, results.predict(X) ** 3)

In [None]:
plt.figure(figsize=(20, 10))
# plt.scatter(df.weight, results2.predict(X2) ** 3, color = 'red')
plt.scatter(df.weight, results3.predict(X3) ** 3, color = 'red')
plt.scatter(df.weight, results4.predict(X4) ** 3, marker = 'x')

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(df.weight, results.predict(X) ** 3)

In [None]:
queryCache = {}

In [None]:
pen_id = 60
df_start_date = '2020-08-24'
df_end_date = '2020-08-26'
# pen_id = 116
# df_start_date = '2020-10-26'
# df_end_date = '2020-10-30'

if pen_id in queryCache and df_start_date in queryCache[pen_id] and df_end_date in queryCache[pen_id][df_start_date]:
    df = queryCache[pen_id][df_start_date][df_end_date]
else:
    df = extract_biomass_data(pen_id, df_start_date, df_end_date, 0)
    df.date = pd.to_datetime(df.date)

    depths = []
    lengths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        vector = wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']
        depths.append(depth)
        lengths.append(np.linalg.norm(vector))
    df['depth'] = depths
    df['length'] = lengths
    
    queryCache[pen_id] = { df_start_date: { df_end_date: df } }

In [None]:
df = df[df.akpd_score > 0.95]

In [None]:
plt.figure(figsize=(20, 10))

plt.scatter(df.length, df.estimated_weight_g)

x = np.arange(np.min(df.length), np.max(df.length), 0.01)
x2 = np.arange(np.min(df.length), np.max(df.length) + 0.05, 0.01)
y = (23.6068 * x) ** 3
plt.plot(x, y, color = 'red')
y = (22 * x2) ** 3
plt.plot(x2, y, color = 'green')
y = (25 * x) ** 3
plt.plot(x, y, color = 'orange')
plt.axhline(9000)
plt.xlabel('Length (m)')
plt.ylabel('Weight (g)')
plt.title('Slapoya Length vs Weight')

In [None]:
df2 = df[((df.hour >= 7) & (df.hour <= 15))].copy()
df3 = df[((df.hour >= 7) & (df.hour <= 15))].copy()
a = df3[df3.estimated_weight_g >= 5000].estimated_weight_g
b = (23.6068 * df3[df3.estimated_weight_g < 5000].length) ** 3
print(np.mean(df2.estimated_weight_g), np.mean(df3.estimated_weight_g))

In [None]:
np.mean((23.6068 * df2.length) ** 3)

In [None]:
(np.mean(a) * len(a) + np.mean(b) * len(b)) / (len(a) + len(b))