In [None]:
%load_ext autoreload
%autoreload 2

import json
import pandas as pd
import numpy as np
from keras.models import load_model
from research_lib.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from weight_estimation.dataset import prepare_gtsf_data, compute_akpd_score
from weight_estimation.train import train, augment, normalize, get_data_split, train_model
from typing import Dict, Tuple


In [None]:
s3 = S3AccessUtils('/root/data')
akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
akpd_scorer_f, _, _ = s3.download_from_url(akpd_scorer_url)
df1 = prepare_gtsf_data('2019-03-01', '2019-09-20', akpd_scorer_f, 0.5, 1.0)

df2 = prepare_gtsf_data('2020-06-01', '2020-08-20', akpd_scorer_f, 0.5, 1.0)
df = pd.concat([df1, df2])

In [None]:
def simulate_larger_fish(world_keypoints, weight, pct_inflation):
    modified_world_keypoints = {}
    for body_part in core_body_parts:
        kps = world_keypoints[body_part]
        modified_kps = (1.0 + pct_inflation) * kps
        modified_world_keypoints[body_part] = modified_kps
    modified_weight = (1.0 + pct_inflation)**3.0852 * weight
    return modified_world_keypoints, modified_weight

def get_ann_from_world_keypoints(world_keypoints, cm):
    ann = {'leftCrop': [], 'rightCrop': []}
    for body_part in core_body_parts:
        x, y, z = world_keypoints[body_part]
        px_x = round(x * cm['focalLengthPixel'] / y + cm['pixelCountWidth'] / 2.0)
        px_y = round(cm['pixelCountHeight'] / 2.0 - z * cm['focalLengthPixel'] / y)
        disparity = round(cm['focalLengthPixel'] * cm['baseline'] / y)
        
        left_item = {
            'keypointType': body_part,
            'xFrame': px_x,
            'yFrame': px_y
        }
        
        right_item = {
            'keypointType': body_part,
            'xFrame': px_x - disparity,
            'yFrame': px_y
        }
        
        ann['leftCrop'].append(left_item)
        ann['rightCrop'].append(right_item)
    return ann

In [None]:
from weight_estimation.body_parts import core_body_parts
from weight_estimation.dataset import prepare_gtsf_data, compute_akpd_score

from research.weight_estimation.keypoint_utils.optics import pixel2world

mask = (df.weight >= 7000) & (df.weight <= 9000) 
max_pct_inflation = 0.15


world_keypoints = []
for idx, row in df.iterrows():
    ann, cm = row.keypoints, row.camera_metadata
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    world_keypoints.append(wkps)
    
df['world_keypoints'] = world_keypoints

from research_lib.utils.data_access_utils import S3AccessUtils
s3 = S3AccessUtils('/root/data')
akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
akpd_scorer_f, _, _ = s3.download_from_url(akpd_scorer_url)
from keras.models import load_model
akpd_scorer_network = load_model(akpd_scorer_f)

modified_ann_list = []
modified_weight_list = []
modified_akpd_score_list = []
cm_list = []

for idx, row in df[mask].iterrows():
    world_keypoints = row.world_keypoints
    cm = row.camera_metadata
    weight = row.weight
    modified_world_keypoints, modified_weight = simulate_larger_fish(world_keypoints, weight, max_pct_inflation)
    modified_ann = get_ann_from_world_keypoints(modified_world_keypoints, cm)
    
    modified_ann_list.append(modified_ann)
    modified_weight_list.append(modified_weight)
    modified_akpd_score_list.append(compute_akpd_score(akpd_scorer_network, modified_ann, cm))
    cm_list.append(cm)

In [None]:
from collections import defaultdict
import json
import os
import random
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
from weight_estimation.utils import get_left_right_keypoint_arrs, \
     convert_to_world_point_arr, CameraMetadata
from weight_estimation.dataset import prepare_gtsf_data
from keras.layers import Input, Dense, Flatten, Dropout
from keras.models import Model
import keras
from research_lib.utils.data_access_utils import S3AccessUtils
import torch
from torch import nn
from sklearn.linear_model import LinearRegression

class Network(nn.Module):
    """Network class defines neural-network architecture for both weight and k-factor estimation
    (currently both neural networks share identical architecture)."""

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        """Run inference on input keypoint tensor."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x
    
    def forward_intermediate(self, x):
        """Run inference on input keypoint tensor and get final hiddel layer weights."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        return x
    
class NetworkWithDropout(nn.Module):
    """Network class defines neural-network architecture for both weight and k-factor estimation
    (currently both neural networks share identical architecture)."""

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.01)

    def forward(self, x):
        """Run inference on input keypoint tensor."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.output(x)
        return x
    
    def forward_intermediate(self, x):
        """Run inference on input keypoint tensor and get final hiddel layer weights."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        return x

def augment(df: pd.DataFrame, augmentation_config: Dict) -> pd.DataFrame:
    counts, edges = np.histogram(df.weight, bins=np.arange(0, 10000, 1000))
    trial_values = (5.0 / (counts / np.max(counts))).astype(int)
    max_jitter_std = augmentation_config['max_jitter_std']
    min_depth = augmentation_config['min_depth']
    max_depth = augmentation_config['max_depth']

    augmented_data = defaultdict(list)
    for idx, row in df.iterrows():
        
        camera_metadata = row.camera_metadata
        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )
        
        weight = row.weight
        trials = trial_values[min(int(weight / 1000), len(trial_values) - 1)]
        for _ in range(trials):
            
            ann = row.keypoints
            X_left, X_right = get_left_right_keypoint_arrs(ann)
            wkps = convert_to_world_point_arr(X_left, X_right, cm)
            original_depth = np.median(wkps[:, 1])
            
            depth = np.random.uniform(min_depth, max_depth)
            scaling_factor = float(original_depth) / depth
            
            jitter_std = 5 * scaling_factor

            # rescale
            X_left = X_left * scaling_factor
            X_right = X_right * scaling_factor

            # add jitter
            X_left[:, 0] += np.random.normal(0, jitter_std, X_left.shape[0])
            X_right[:, 0] += np.random.normal(0, jitter_std, X_right.shape[0])

            # reconstruct annotation
            ann = get_ann_from_keypoint_arrs(X_left, X_right)
            augmented_data['annotation'].append(ann)
            augmented_data['fish_id'].append(row.fish_id)
            augmented_data['weight'].append(row.weight)
            augmented_data['kf'].append(row.k_factor)
            augmented_data['camera_metadata'].append(row.camera_metadata)

    augmented_df = pd.DataFrame(augmented_data)
    return augmented_df

In [None]:
import uuid

mask = df.weight > 0
tdf_original = df.loc[mask, ['keypoints', 'fish_id', 'weight', 'k_factor', 'camera_metadata']].copy(deep=True)

annotation_list = []
fish_id_list = []
weight_list = []
kf_list = []
akpd_score_list = []
camera_metadata_list = []

for ann, weight, akpd_score, camera_metadata in zip(modified_ann_list, modified_weight_list, modified_akpd_score_list, cm_list):
    if akpd_score < 0.5 or weight < 0:
        continue
    
    annotation_list.append(ann)
    fish_id_list.append(uuid.uuid1())
    weight_list.append(weight)
    kf_list.append(1.0)
    akpd_score_list.append(akpd_score)
    camera_metadata_list.append(camera_metadata)
    
tdf_synthetic = pd.DataFrame({
    'keypoints': annotation_list,
    'fish_id': fish_id_list,
    'weight': weight_list,
    'k_factor': kf_list,
    'akpd_score': akpd_score_list,
    'camera_metadata': camera_metadata_list
})

tdf = pd.concat([tdf_original, tdf_synthetic])

In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=5,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df3 = augment(tdf, augmentation_config)

In [None]:
"""
This module contains constants representing core & auxiliary fish body parts.
"""

UPPER_LIP = 'UPPER_LIP'
EYE = 'EYE'
PECTORAL_FIN = 'PECTORAL_FIN'
DORSAL_FIN = 'DORSAL_FIN'
PELVIC_FIN = 'PELVIC_FIN'
ADIPOSE_FIN = 'ADIPOSE_FIN'
ANAL_FIN = 'ANAL_FIN'
TAIL_NOTCH = 'TAIL_NOTCH'
UPPER_PRECAUDAL_PIT = 'UPPER_PRECAUDAL_PIT'
LOWER_PRECAUDAL_PIT = 'LOWER_PRECAUDAL_PIT'
HYPURAL_PLATE = 'HYPURAL_PLATE'

core_body_parts = sorted([UPPER_LIP,
                          EYE,
                          PECTORAL_FIN,
                          DORSAL_FIN,
                          PELVIC_FIN,
                          ADIPOSE_FIN,
                          ANAL_FIN,
                          TAIL_NOTCH])

auxiliary_body_parts = sorted([UPPER_PRECAUDAL_PIT,
                               LOWER_PRECAUDAL_PIT,
                               HYPURAL_PLATE])

all_body_parts = sorted(core_body_parts + auxiliary_body_parts)

BODY_PARTS = core_body_parts

In [None]:
"""This module contains utility helper functions for the WeightEstimator class."""

from collections import namedtuple
from typing import Dict, List, Tuple
import numpy as np
import torch
from research.weight_estimation.keypoint_utils import body_parts


CameraMetadata = namedtuple('CameraMetadata',
                            ['focal_length', 'focal_length_pixel', 'baseline_m',
                             'pixel_count_width', 'pixel_count_height', 'image_sensor_width',
                             'image_sensor_height'])


def get_left_right_keypoint_arrs(annotation: Dict[str, List[Dict]]) -> Tuple:
    """Gets numpy array of left and right keypoints given input keypoint annotation.
    Args:
        annotation: dict with keys 'leftCrop' and 'rightCrop'. Values are lists where each element
        is a dict with keys 'keypointType', 'xCrop' (num pixels from crop left edge),
        'yCrop' (num pixels from crop top edge), 'xFrame' (num pixels from full frame left edge),
        and 'yFrame' (num pixels from full frame top edge).
    Returns:
        X_left: numpy array containing left crop (xFrame, yFrame) for each key-point ordered
        alphabetically.
        X_right: same as above, but for right crop.
    """

    left_keypoints, right_keypoints = {}, {}
    for item in annotation['leftCrop']:
        body_part = item['keypointType']
        left_keypoints[body_part] = (item['xFrame'], item['yFrame'])

    for item in annotation['rightCrop']:
        body_part = item['keypointType']
        right_keypoints[body_part] = (item['xFrame'], item['yFrame'])

    left_keypoint_arr, right_keypoint_arr = [], []
    for body_part in body_parts.core_body_parts:
        left_keypoint_arr.append(left_keypoints[body_part])
        right_keypoint_arr.append(right_keypoints[body_part])

    X_left = np.array(left_keypoint_arr)
    X_right = np.array(right_keypoint_arr)
    return X_left, X_right


def normalize_left_right_keypoint_arrs(X_left: np.ndarray, X_right: np.ndarray) -> Tuple:
    """Normalizes input left and right key-point arrays. The normalization involves (1) 2D
    translation of all keypoints such that they are centered, (2) rotation of the 2D coordiantes
    about the center such that the line passing through UPPER_LIP and fish center is horizontal.
    """

    # translate key-points, perform reflection if necessary
    upper_lip_idx = body_parts.core_body_parts.index(body_parts.UPPER_LIP)
    tail_notch_idx = body_parts.core_body_parts.index(body_parts.TAIL_NOTCH)
    if X_left[upper_lip_idx, 0] > X_left[tail_notch_idx, 0]:
        X_center = 0.5 * (np.max(X_left, axis=0) + np.min(X_left, axis=0))
        X_left_centered = X_left - X_center
        X_right_centered = X_right - X_center
    else:
        X_center = 0.5 * (np.max(X_right, axis=0) + np.min(X_right, axis=0))
        X_left_centered = X_right - X_center
        X_right_centered = X_left - X_center
        X_left_centered[:, 0] = -X_left_centered[:, 0]
        X_right_centered[:, 0] = -X_right_centered[:, 0]

    # rotate key-points
    upper_lip_x, upper_lip_y = tuple(X_left_centered[upper_lip_idx])
    theta = np.arctan(upper_lip_y / upper_lip_x)
    R = np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

    D = X_left_centered - X_right_centered
    X_left_rot = np.dot(X_left_centered, R)
    X_right_rot = X_left_rot - D
    return X_left_rot, X_right_rot


def convert_to_world_point_arr(X_left: np.ndarray, X_right: np.ndarray,
                               camera_metadata: CameraMetadata) -> np.ndarray:
    """Converts input left and right normalized keypoint arrays into world coordinate array."""

    y_world = camera_metadata.focal_length_pixel * camera_metadata.baseline_m / \
              (X_left[:, 0] - X_right[:, 0])

    # Note: the lines commented out below are technically the correct formula for conversion
    # x_world = X_left[:, 0] * y_world / camera_metadata.focal_length_pixel
    # z_world = -X_left[:, 1] * y_world / camera_metadata.focal_length_pixel
    x_world = ((X_left[:, 0] * camera_metadata.image_sensor_width / camera_metadata.pixel_count_width) * y_world) / (camera_metadata.focal_length)
    z_world = (-(X_left[:, 1] * camera_metadata.image_sensor_height / camera_metadata.pixel_count_height) * y_world) / (camera_metadata.focal_length)
    X_world = np.vstack([x_world, y_world, z_world]).T
    return X_world


def stabilize_keypoints(X: np.ndarray) -> np.ndarray:
    """Transforms world coordinate array so that neural network inputs are stabilized"""
    X_new = np.zeros(X.shape)
    X_new[:, 0] = 0.5 * X[:, 0] / X[:, 1]
    X_new[:, 1] = 0.5 * X[:, 2] / X[:, 1]
    X_new[:, 2] = 0.05 / X[:, 1]
    return X_new

# generate the matrix for rotation of angle theta about an axis aligned with unit vector n
def generate_rotation_matrix(n, theta):
    R = np.array([[
        np.cos(theta) + n[0] ** 2 * (1 - np.cos(theta)),
        n[0] * n[1] * (1 - np.cos(theta)) - n[2] * np.sin(theta),
        n[0] * n[2] * (1 - np.cos(theta)) + n[1] * np.sin(theta)
    ], [
        n[1] * n[0] * (1 - np.cos(theta)) + n[2] * np.sin(theta),
        np.cos(theta) + n[1] ** 2 * (1 - np.cos(theta)),
        n[1] * n[2] * (1 - np.cos(theta)) - n[0] * np.sin(theta),
    ], [
        n[2] * n[0] * (1 - np.cos(theta)) - n[1] * np.sin(theta),
        n[2] * n[1] * (1 - np.cos(theta)) + n[0] * np.sin(theta),
        np.cos(theta) + n[2] ** 2 * (1 - np.cos(theta))
    ]])
    return R

# apply rotation to keypoint world-coordinates about origin (i.e. left camera focal point) such that
# fish centroid is on positive y-axis (i.e. camera is looking straight at it). Once this is complete,
# rotate fish about y-axis such that line connecting UPPER_LIP and TAIL_NOTCH is aligned with x-axis.
# Finally, perform reflection such that fish is looking towards the positive x direction (if applicable)
def center_3d_coordinates(wkps):
    v = np.median(wkps[:8], axis=0)
    v /= np.linalg.norm(v)
    y = np.array([0, 1, 0])
    n = np.cross(y, v)
    n /= np.linalg.norm(n)
    theta = -np.arccos(np.dot(y, v))
    R = generate_rotation_matrix(n, theta)
    wkps = np.dot(R, wkps.T).T
    # rotate about y-axis so that fish is straight
    upper_lip_idx = BODY_PARTS.index('UPPER_LIP')
    tail_notch_idx = BODY_PARTS.index('TAIL_NOTCH')
    v = wkps[upper_lip_idx] - wkps[tail_notch_idx]
    n = np.array([0, 1, 0])
    theta = np.arctan(v[2] / v[0])
    R = generate_rotation_matrix(n, theta)
    wkps = np.dot(R, wkps.T).T
    # perform reflection so that fish is forced to look right
    if wkps[upper_lip_idx][0] < wkps[tail_notch_idx][0]:
        R = np.array([
            [-1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        wkps = np.dot(R, wkps.T).T
    return wkps

def convert_to_nn_input(annotation: Dict[str, List[Dict]], camera_metadata: CameraMetadata) \
        -> torch.Tensor:
    """Convrts input keypoint annotation and camera metadata into neural network tensor input."""
    X_left, X_right = get_left_right_keypoint_arrs(annotation)
    X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
    X_world = convert_to_world_point_arr(X_left_norm, X_right_norm, camera_metadata)
    X = stabilize_keypoints(X_world)
    nn_input = torch.from_numpy(np.array([X])).float()
    return nn_input 

def convert_to_nn_input_flat(annotation: Dict[str, List[Dict]], camera_metadata: CameraMetadata) \
        -> torch.Tensor:
    """Convrts input keypoint annotation and camera metadata into neural network tensor input."""
    X_left, X_right = get_left_right_keypoint_arrs(annotation)
    X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
    X_world = convert_to_world_point_arr(X_left_norm, X_right_norm, camera_metadata)
#     X = stabilize_keypoints(X_world)
    X = X_world
    nn_input = torch.from_numpy(np.array([X])).float()
    return torch.flatten(nn_input) 

def convert_to_nn_input_new(annotation: Dict[str, List[Dict]], camera_metadata: CameraMetadata) \
        -> torch.Tensor:
    """Convrts input keypoint annotation and camera metadata into neural network tensor input."""
    X_left, X_right = get_left_right_keypoint_arrs(annotation)
#     X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
    X_world = convert_to_world_point_arr(X_left, X_right, camera_metadata)
    X_world_norm = center_3d_coordinates(X_world)
#     X = stabilize_keypoints(X_world_norm)
    X = X_world_norm
    nn_input = torch.from_numpy(np.array([X])).float()
    return torch.flatten(nn_input)

In [None]:
def get_data_split(X: np.ndarray, y: np.ndarray, fish_ids: np.ndarray, train_pct: float,
                   val_pct: float) -> Tuple:
    # select train / test sets such that there are no overlapping fish IDs

    test_pct = 1.0 - train_pct - val_pct
    unique_fish_ids = np.array(list(set(fish_ids)))
    train_cnt, val_cnt, test_cnt = np.random.multinomial(len(unique_fish_ids),
                                                         [train_pct, val_pct, test_pct])

    assignments = np.array([0] * train_cnt + [1] * val_cnt + [2] * test_cnt)
    np.random.shuffle(assignments)
    train_fish_ids = unique_fish_ids[np.where(assignments == 0)]
    val_fish_ids = unique_fish_ids[np.where(assignments == 1)]
    test_fish_ids = unique_fish_ids[np.where(assignments == 2)]

    train_mask = np.isin(fish_ids, train_fish_ids)
    val_mask = np.isin(fish_ids, val_fish_ids)
    test_mask = np.isin(fish_ids, test_fish_ids)

    X_train, y_train = X[train_mask], y[train_mask]
    X_val, y_val = X[val_mask], y[val_mask]
    X_test, y_test = X[test_mask], y[test_mask]

    return X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask


def train_model_with_dropout(X_train, y_train, X_val, y_val, train_config):
    inputs = Input(shape=(24,))
    x = Dense(256, activation='relu')(inputs)
    x = Dropout(0.01)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.01)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.01)(x)
    pred = Dense(1)(x)
    model = Model(inputs, pred)

    epochs = train_config['epochs']
    batch_size = train_config['batch_size']
    lr = train_config['learning_rate']
    patience = train_config['patience']

    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
                                               min_delta=0,
                                               patience=patience,
                                               verbose=0,
                                               mode='auto')]

    optimizer = keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=optimizer,
                  loss='mean_squared_error',
                  metrics=['accuracy'])
    model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callbacks,
              batch_size=batch_size, epochs=epochs)

    return model

def train_model(X_train, y_train, X_val, y_val, train_config):
    inputs = Input(shape=(24,))
    x = Dense(256, activation='relu')(inputs)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    pred = Dense(1)(x)
    model = Model(inputs, pred)

    epochs = train_config['epochs']
    batch_size = train_config['batch_size']
    lr = train_config['learning_rate']
    patience = train_config['patience']

    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
                                               min_delta=0,
                                               patience=patience,
                                               verbose=0,
                                               mode='auto')]

    optimizer = keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=optimizer,
                  loss='mean_squared_error',
                  metrics=['accuracy'])
    model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callbacks,
              batch_size=batch_size, epochs=epochs)

    return model


def convert_to_pytorch(model):
    pytorch_model = Network()
    weights = model.get_weights()

    pytorch_model.fc1.weight.data = torch.from_numpy(np.transpose(weights[0]))
    pytorch_model.fc1.bias.data = torch.from_numpy(np.transpose(weights[1]))
    pytorch_model.fc2.weight.data = torch.from_numpy(np.transpose(weights[2]))
    pytorch_model.fc2.bias.data = torch.from_numpy(np.transpose(weights[3]))
    pytorch_model.fc3.weight.data = torch.from_numpy(np.transpose(weights[4]))
    pytorch_model.fc3.bias.data = torch.from_numpy(np.transpose(weights[5]))
    pytorch_model.output.weight.data = torch.from_numpy(np.transpose(weights[6]))
    pytorch_model.output.bias.data = torch.from_numpy(np.transpose(weights[7]))
    
    return pytorch_model

def convert_to_pytorch_with_dropout(model):
    pytorch_model = NetworkWithDropout()
    weights = model.get_weights()

    pytorch_model.fc1.weight.data = torch.from_numpy(np.transpose(weights[0]))
    pytorch_model.fc1.bias.data = torch.from_numpy(np.transpose(weights[1]))
    pytorch_model.fc2.weight.data = torch.from_numpy(np.transpose(weights[2]))
    pytorch_model.fc2.bias.data = torch.from_numpy(np.transpose(weights[3]))
    pytorch_model.fc3.weight.data = torch.from_numpy(np.transpose(weights[4]))
    pytorch_model.fc3.bias.data = torch.from_numpy(np.transpose(weights[5]))
    pytorch_model.output.weight.data = torch.from_numpy(np.transpose(weights[6]))
    pytorch_model.output.bias.data = torch.from_numpy(np.transpose(weights[7]))
    
    return pytorch_model

def apply_final_layer_ols(pytorch_model):
    X_ols = pytorch_model.forward_intermediate(torch.from_numpy(X_train).float()).detach().numpy()
    lr = LinearRegression().fit(X_ols, y_train)
    pytorch_model.output.weight.data = torch.from_numpy(np.array(lr.coef_).reshape(1, -1))
    pytorch_model.output.bias.data = torch.from_numpy(np.array([lr.intercept_]))


In [None]:
annotation, camera_metadata = anns[0], cms[0]

cm = CameraMetadata(
    focal_length=camera_metadata['focalLength'],
    focal_length_pixel=camera_metadata['focalLengthPixel'],
    baseline_m=camera_metadata['baseline'],
    pixel_count_width=camera_metadata['pixelCountWidth'],
    pixel_count_height=camera_metadata['pixelCountHeight'],
    image_sensor_width=camera_metadata['imageSensorWidth'],
    image_sensor_height=camera_metadata['imageSensorHeight']
)

# X_left, X_right = get_left_right_keypoint_arrs(annotation)
# #     X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
# X_world = convert_to_world_point_arr(X_left, X_right, cm)
# X_world_norm = center_3d_coordinates(X_world)
# X = stabilize_keypoints(X_world_norm)
# nn_input = torch.from_numpy(np.array([X])).float()

X_left, X_right = get_left_right_keypoint_arrs(annotation)
X_left_norm, X_right_norm = normalize_left_right_keypoint_arrs(X_left, X_right)
X_world = convert_to_world_point_arr(X_left_norm, X_right_norm, cm)
X = stabilize_keypoints(X_world)
nn_input = torch.from_numpy(np.array([X])).float()

nn_input

In [None]:
BODY_PARTS

In [None]:
convert_to_nn_input(annotation, cm)

In [None]:
convert_to_nn_input_new(annotation, cm)

In [None]:
anns = augmented_df3.annotation.values.tolist()
cms = augmented_df3.camera_metadata.values.tolist()

def normalize(anns: List, camera_metadatas: List) -> np.ndarray:
    norm_anns = []
    for ann, camera_metadata in zip(anns, camera_metadatas):

        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )

        norm_ann = convert_to_nn_input(ann, cm)
        norm_anns.append(norm_ann.numpy())
    return np.array(norm_anns)

X = normalize(anns, cms)

In [None]:
anns = augmented_df3.annotation.values.tolist()
cms = augmented_df3.camera_metadata.values.tolist()

def normalize_new(anns: List, camera_metadatas: List) -> np.ndarray:
    norm_anns = []
    for ann, camera_metadata in zip(anns, camera_metadatas):

        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )

        norm_ann = convert_to_nn_input_new(ann, cm)
#         norm_ann = convert_to_nn_input(ann, cm)
        norm_anns.append(norm_ann.numpy())
    return np.array(norm_anns)

X_new = normalize_new(anns, cms)

In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

train_config = dict(
    train_pct=0.9,
    val_pct=0.09,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df3.weight.values
fish_ids = augmented_df3.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model_with_dropout(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch_with_dropout(tf_model)
# apply_final_layer_ols(pytorch_model)

In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df3.weight.values
fish_ids = augmented_df3.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)


In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df3.weight.values
fish_ids = augmented_df3.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X_new, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)


In [None]:
augmented_df3['is_train'] = train_mask.astype(int)
augmented_df3['is_val'] = val_mask.astype(int)
augmented_df3['is_test'] = test_mask.astype(int)


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X_new).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
def generate_per_bucket_error(X, y):
    y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()

    buckets = np.arange(0, 10000, 1000) * 1e-4
    bucket_strs = []
    mean_errs = []
    maes = []
    for low, high in zip(buckets, buckets[1:]):
        bucket_str = '{}-{}'.format(round(1e4 * low), round(1e4 * high))
        mask = (y >= low) & (y < high)
        mean_err = np.mean((y_pred[mask] - y[mask]) / y[mask])
        mae = np.mean(np.abs((y_pred[mask] - y[mask]) / y[mask]))
        mean_errs.append(mean_err)
        maes.append(mae)
        bucket_strs.append(bucket_str)
    
    return pd.DataFrame({'bucket': bucket_strs, 'mean_err': mean_errs, 'mae': maes})

print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
def generate_per_depth_bucket_error(X, y, d):
    predictions = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()

    depths = np.arange(0.4, 2.6, 0.1)
    mean_pct_errs = []
    depth_buckets = []
    for low_depth, high_depth in zip(depths, depths[1:]):
        mask = (d >= low_depth) & (d < high_depth)
        depth_bucket = '{}-{}'.format(round(low_depth, 2), round(high_depth, 2))
        depth_buckets.append(depth_bucket)
        mean_pct_err = np.nanmean((predictions[mask] - y[mask]) / y[mask])
        mean_pct_errs.append(mean_pct_err)


    return pd.DataFrame({'depth_bucket': depth_buckets, 'mean_err': mean_pct_errs})

print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df3[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df3[test_mask].depth.values))


In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient10.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
import json
import os
import pandas as pd
from research.utils.data_access_utils import S3AccessUtils
from report_generation.report_generator import generate_ts_data, SamplingFilter
from research.utils.datetime_utils import add_days
from report_generation.report_generator import gen_pm_base
from population_metrics.smart_metrics import generate_smart_avg_weight, generate_smart_individual_values, ValidationError
from filter_optimization.filter_optimization_task import _add_date_hour_columns
from research.weight_estimation.keypoint_utils.optics import pixel2world
import numpy as np

pd.set_option('display.max_rows', 500)

In [None]:
cohort_names = [
    'seglberget_pen_id_66_2020-05-13_2020-06-13',
    'bolaks_pen_id_88_2020-02-28_2020-03-10',
    'langoy_pen_id_108_2020-05-07_2020-05-17',
    'tittelsnes_pen_id_37_2020-06-10_2020-06-24',
    'aplavika_pen_id_95_2020-07-10_2020-07-26',
#     'kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02',
    'silda_pen_id_86_2020-07-02_2020-07-19',
    'vikane_pen_id_60_2020-08-10_2020-08-30',
    'eldviktaren_pen_id_164_2020-09-21_2020-10-08',
#     'habranden_pen_id_100_2020-08-10_2020-08-31',
    'varholmen_pen_id_131_2020-08-15_2020-08-30',
    'dale_pen_id_143_2020-10-07_2020-10-21',
    'djubawik_pen_id_153_2020-11-10_2020-11-26',
    'leivsethamran_pen_id_165_2020-10-18_2020-11-13',
    'movikodden_pen_id_114_2020-11-03_2020-11-25',
    'movikodden_pen_id_167_2020-10-13_2020-10-30',
    'slapoya_pen_id_116_2020-10-18_2020-11-08',
    'varholmen_pen_id_131_2020-08-15_2020-08-30',
    'varholmen_pen_id_151_2020-10-02_2020-10-17',
    'varholmen_pen_id_186_2020-10-18_2020-11-02'
]

In [None]:
camera_type = {
    'seglberget_pen_id_66_2020-05-13_2020-06-13': 'sexton',
    'bolaks_pen_id_88_2020-02-28_2020-03-10': 'sexton',
    'langoy_pen_id_108_2020-05-07_2020-05-17': 'sexton',
    'tittelsnes_pen_id_37_2020-06-10_2020-06-24': 'sexton',
    'aplavika_pen_id_95_2020-07-10_2020-07-26': 'sexton',
#     'kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02': 'sexton',
    'silda_pen_id_86_2020-07-02_2020-07-19': 'sexton',
    'vikane_pen_id_60_2020-08-10_2020-08-30': 'atlas',
    'eldviktaren_pen_id_164_2020-09-21_2020-10-08': 'atlas',
#     'habranden_pen_id_100_2020-08-10_2020-08-31': 'imenco',
    'varholmen_pen_id_131_2020-08-15_2020-08-30': 'imenco',
    'dale_pen_id_143_2020-10-07_2020-10-21': 'atlas',
    'djubawik_pen_id_153_2020-11-10_2020-11-26': 'atlas',
    'leivsethamran_pen_id_165_2020-10-18_2020-11-13': 'atlas',
    'movikodden_pen_id_114_2020-11-03_2020-11-25': 'imenco',
    'movikodden_pen_id_167_2020-10-13_2020-10-30': 'imenco',
    'slapoya_pen_id_116_2020-10-18_2020-11-08': 'imenco',
    'varholmen_pen_id_131_2020-08-15_2020-08-30': 'imenco',
    'varholmen_pen_id_151_2020-10-02_2020-10-17': 'imenco',
    'varholmen_pen_id_186_2020-10-18_2020-11-02': 'atlas'
}

In [None]:
batch_name = 'test'

ROOT_DIR = '/root/data/alok/biomass_estimation/playground'
dfs, gt_metadatas = {}, {}
for cohort_name in cohort_names:
    s3_dir = os.path.join(
        'https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets',
        cohort_name
    )

    ground_truth_metadata_url = os.path.join(s3_dir, 'ground_truth_metadata.json')
    ground_truth_key_base = os.path.join(batch_name, cohort_name, 'ground_truth_metadata.json')
#     ground_truth_metadata_url = os.path.join(s3_dir, 'ground_truth_metadata_validated.json')
#     ground_truth_key_base = os.path.join(batch_name, cohort_name, 'ground_truth_metadata_validated.json')
    ground_truth_f = os.path.join(ROOT_DIR, ground_truth_key_base)
    print(ground_truth_metadata_url)
    s3.download_from_url(ground_truth_metadata_url, custom_location=ground_truth_f)
    gt_metadata = json.load(open(ground_truth_f))
    gt_metadatas[cohort_name] = gt_metadata
    
    data_url = os.path.join(s3_dir, 'annotation_dataset.csv')
    data_f, _, _= s3.download_from_url(data_url)
    df = pd.read_csv(data_f)
    df = _add_date_hour_columns(df)
    dfs[cohort_name] = df
    
    

In [None]:
models = [
    ('weight_v1', 'orig1', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient1.pb', False, False, False, False),
    ('weight_v2', 'orig2', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient2.pb', False, True, True, False),
    ('weight_v3', 'orig1-noNorm', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient3.pb', False, False, True, False),
    ('weight_v4', 'orig2-noNorm', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient4.pb', False, True, True, False),
    ('weight_v5', 'orig1-noNorm-dropout', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient5.pb', False, False, True, True),
    ('weight_v6', 'orig1-noNorm-dropout-noOLS', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient6.pb', False, False, True, True),
    ('weight_v7', 'orig1-noNorm-dropout-noOLS0.9', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient7.pb', False, False, True, True),
    ('weight_v8', 'orig1-noNorm-dropout0.2-noOLS0.9', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient8.pb', False, False, True, True),
    ('weight_v9', 'orig1-noNorm-dropout1-noOLS0.9', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient9.pb', False, False, True, True),
    ('weight_v10', 'orig1-noNorm-dropout.01-noOLS0.9', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient10.pb', False, False, True, True)
]

additional_models = [
    ('weight_v10', 'orig1-noNorm-dropout.01-noOLS0.9', '/root/data/alok/biomass_estimation/playground/output_model_bryton_orient10.pb', False, False, True, True)
]

In [None]:
"""
This module contains the WeightEstimator class for estimating fish weight (g), length (mm), and
k-factor given input keypoint coordinates and camera metadata.
"""

from typing import Dict, Tuple
import torch
from torch import nn

class WeightEstimator:
    """WeightEstimator class is used to predict fish weight, k-factor, and length
    given input keypoint annotations and camera metadata."""

    def __init__(self, weight_model_f: str, kf_model_f: str) -> None:
        """Initializes class with input weight and k-factor neural-networks."""
        self.weight_model = Network()
        self.weight_model.load_state_dict(torch.load(weight_model_f))
        self.weight_model.eval()

        self.kf_model = Network()
        self.kf_model.load_state_dict(torch.load(kf_model_f))
        self.kf_model.eval()

    @staticmethod
    def _get_model_input(annotation: Dict, camera_metadata: CameraMetadata) -> torch.Tensor:
        """Generates neural-network input tensor given annotation and camera_metadata."""
        X = convert_to_nn_input(annotation, camera_metadata)
#         X = convert_to_nn_input_new(annotation, camera_metadata)
        return X

    def predict_weight(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates weight prediction given input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        weight = 1e4 * self.weight_model(X).item()
        return weight

    def predict_kf(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates k-factor prediction gievn input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        kf = self.kf_model(X).item()
        return kf

    def predict(self, annotation: Dict, camera_metadata: CameraMetadata) -> Tuple:
        """Generates weight, k-factor, and length predictions given input annotation and camera
        metadata."""
        weight = self.predict_weight(annotation, camera_metadata)
        kf = self.predict_kf(annotation, camera_metadata)
        if weight * kf > 0:
            length = (1e5 * weight / kf) ** (1.0 / 3)
        else:
            length = 0
        return weight, length, kf
    
class WeightEstimatorFlat:
    """WeightEstimator class is used to predict fish weight, k-factor, and length
    given input keypoint annotations and camera metadata."""

    def __init__(self, weight_model_f: str, kf_model_f: str) -> None:
        """Initializes class with input weight and k-factor neural-networks."""
        self.weight_model = Network()
        self.weight_model.load_state_dict(torch.load(weight_model_f))
        self.weight_model.eval()

        self.kf_model = Network()
        self.kf_model.load_state_dict(torch.load(kf_model_f))
        self.kf_model.eval()

    @staticmethod
    def _get_model_input(annotation: Dict, camera_metadata: CameraMetadata) -> torch.Tensor:
        """Generates neural-network input tensor given annotation and camera_metadata."""
        X = convert_to_nn_input_flat(annotation, camera_metadata)
        X = X.reshape(1, 24)
        return X

    def predict_weight(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates weight prediction given input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        weight = 1e4 * self.weight_model(X).item()
        return weight

    def predict_kf(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates k-factor prediction gievn input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        kf = self.kf_model(X).item()
        return kf

    def predict(self, annotation: Dict, camera_metadata: CameraMetadata) -> Tuple:
        """Generates weight, k-factor, and length predictions given input annotation and camera
        metadata."""
        weight = self.predict_weight(annotation, camera_metadata)
        kf = self.predict_kf(annotation, camera_metadata)
        if weight * kf > 0:
            length = (1e5 * weight / kf) ** (1.0 / 3)
        else:
            length = 0
        return weight, length, kf
    

class WeightEstimatorNew:
    """WeightEstimator class is used to predict fish weight, k-factor, and length
    given input keypoint annotations and camera metadata."""

    def __init__(self, weight_model_f: str, kf_model_f: str) -> None:
        """Initializes class with input weight and k-factor neural-networks."""
        self.weight_model = Network()
        self.weight_model.load_state_dict(torch.load(weight_model_f))
        self.weight_model.eval()

        self.kf_model = Network()
        self.kf_model.load_state_dict(torch.load(kf_model_f))
        self.kf_model.eval()

    @staticmethod
    def _get_model_input(annotation: Dict, camera_metadata: CameraMetadata) -> torch.Tensor:
        """Generates neural-network input tensor given annotation and camera_metadata."""
#         X = convert_to_nn_input(annotation, camera_metadata)
        X = convert_to_nn_input_new(annotation, camera_metadata)
        X = X.reshape(1, 24)
        return X

    def predict_weight(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates weight prediction given input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        weight = 1e4 * self.weight_model(X).item()
        return weight

    def predict_kf(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates k-factor prediction gievn input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        kf = self.kf_model(X).item()
        return kf

    def predict(self, annotation: Dict, camera_metadata: CameraMetadata) -> Tuple:
        """Generates weight, k-factor, and length predictions given input annotation and camera
        metadata."""
        weight = self.predict_weight(annotation, camera_metadata)
        kf = self.predict_kf(annotation, camera_metadata)
        if weight * kf > 0:
            length = (1e5 * weight / kf) ** (1.0 / 3)
        else:
            length = 0
        return weight, length, kf
    
class WeightEstimatorWithDropout:
    """WeightEstimator class is used to predict fish weight, k-factor, and length
    given input keypoint annotations and camera metadata."""

    def __init__(self, weight_model_f: str, kf_model_f: str) -> None:
        """Initializes class with input weight and k-factor neural-networks."""
        self.weight_model = NetworkWithDropout()
        self.weight_model.load_state_dict(torch.load(weight_model_f))
        self.weight_model.eval()

        self.kf_model = Network()
        self.kf_model.load_state_dict(torch.load(kf_model_f))
        self.kf_model.eval()

    @staticmethod
    def _get_model_input(annotation: Dict, camera_metadata: CameraMetadata) -> torch.Tensor:
        """Generates neural-network input tensor given annotation and camera_metadata."""
        X = convert_to_nn_input(annotation, camera_metadata)
#         X = convert_to_nn_input_new(annotation, camera_metadata)
        return X

    def predict_weight(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates weight prediction given input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        weight = 1e4 * self.weight_model(X).item()
        return weight

    def predict_kf(self, annotation: Dict, camera_metadata: CameraMetadata) -> float:
        """Generates k-factor prediction gievn input annotation and camera metadata."""
        X = self._get_model_input(annotation, camera_metadata)
        kf = self.kf_model(X).item()
        return kf

    def predict(self, annotation: Dict, camera_metadata: CameraMetadata) -> Tuple:
        """Generates weight, k-factor, and length predictions given input annotation and camera
        metadata."""
        weight = self.predict_weight(annotation, camera_metadata)
        kf = self.predict_kf(annotation, camera_metadata)
        if weight * kf > 0:
            length = (1e5 * weight / kf) ** (1.0 / 3)
        else:
            length = 0
        return weight, length, kf

In [None]:
for key, tag, model_url, is_url, is_new, is_flat, has_dropout in additional_models:
    # weight_model_f, _, _ = s3.download_from_url('https://aquabyte-models.s3-us-west-1.amazonaws.com/biomass/trained_models/2020-11-27T00-00-00/weight_model_synthetic_data.pb')
    if is_url:
        weight_model_f, _, _ = s3.download_from_url(model_url)
    else:
        weight_model_f = model_url
    kf_model_f, _, _ = s3.download_from_url('https://aquabyte-models.s3-us-west-1.amazonaws.com/k-factor/playground/kf_predictor_v2.pb')

    if has_dropout:
        weight_estimator = WeightEstimatorWithDropout(weight_model_f, kf_model_f)
    elif is_new:
        weight_estimator = WeightEstimatorNew(weight_model_f, kf_model_f)
    elif is_flat:
        weight_estimator = WeightEstimatorFlat(weight_model_f, kf_model_f)
    else:
        weight_estimator = WeightEstimator(weight_model_f, kf_model_f)


    for k, rdf in dfs.items():
        print(k)
        weights = []
        count = 0
        for idx, row in rdf.iterrows():
            if count % 100 == 0:
                print('Percentage completion: {}%'.format(round(100 * count / rdf.shape[0], 2)))
                print(count)
            count += 1
            annotation = json.loads(row.annotation.replace("'", '"'))
            if not annotation:
                weights.append(None)
                continue
            camera_metadata = json.loads(row.camera_metadata.replace("'", '"'))
            if not camera_metadata:
                camera_metadata = json.loads(rdf.camera_metadata.iloc[0].replace("'", '"'))

            camera_metadata_obj = CameraMetadata(
                focal_length=camera_metadata['focalLength'],
                focal_length_pixel=camera_metadata['focalLengthPixel'],
                baseline_m=camera_metadata['baseline'],
                pixel_count_width=camera_metadata['pixelCountWidth'],
                pixel_count_height=camera_metadata['pixelCountHeight'],
                image_sensor_width=camera_metadata['imageSensorWidth'],
                image_sensor_height=camera_metadata['imageSensorHeight']
            )

            weight, length, kf = weight_estimator.predict(annotation, camera_metadata_obj)
            weights.append(weight)
        rdf[key] = weights

In [None]:
def generate_raw_individual_values(pm_base, gt_metadata, start_hour, end_hour, apply_growth_rate, max_day_diff, days_post_feeding, final_days_post_feeding):
    last_feeding_date = gt_metadata['last_feeding_date']
    date = add_days(last_feeding_date, days_post_feeding)
    weights, _ = generate_smart_individual_values(pm_base, date, max_day_diff, True, apply_growth_rate, 0.9)
    return weights


def generate_average_weight_accuracy(weights, gt_metadata, loss_factor):
    avg_weight_prediction = np.mean(weights)
    gutted_weight_prediction = avg_weight_prediction * (1.0 - loss_factor)
    gt_weight = gt_metadata['gutted_average_weight']
    avg_weight_err = (gutted_weight_prediction - gt_weight) / gt_weight
    return avg_weight_err, gutted_weight_prediction

def generate_distribution_accuracy(weights, gt_metadata, loss_factor):
    gutted_weights = weights * (1.0 - loss_factor)
    gutted_weight_distribution = gt_metadata['gutted_weight_distribution']
    
    if gutted_weight_distribution is None:
        return []
    
    count_distribution_errors = []
    
    for bucket in gutted_weight_distribution:
        lower_bound, upper_bound = bucket.split('-')
        pct = gutted_weight_distribution[bucket]
        mask = (gutted_weights >= float(lower_bound) * 1000) & (gutted_weights < float(upper_bound) * 1000)

        pct = np.sum(mask) / len(mask)
        gt_pct = gutted_weight_distribution[bucket] / 100
        
        count_distribution_errors.append(pct - gt_pct)
        
    return count_distribution_errors



In [None]:
all_tdfs = []

In [None]:
from datetime import datetime

import matplotlib.pyplot as plt

for key, tag, _, _, _, _, _ in additional_models:
    start_hours = [7]
    end_hours = [15]
    apply_growth_rate = True
    max_day_diff = 3
    days_post_feeding = 1
    final_days_post_feeding = 1
    loss_factors = [0.16, 'expected_loss_factor'] # need to determine the right values here
    akpd_cutoffs = [0.01, 0.95]

    hour_filter_methods = ['manual', 'hour_hist', 'u-shape'] #  'u-shape',

    cohort_name_col = []
    akpd_cutoff_col = []
    hour_filter_method_col = []
    start_hour_col = []
    end_hour_col = []
    loss_factor_col = []
    starvation_days_col = []
    avg_weight_col = []
    gutted_avg_weight_col = []
    avg_weight_error_col = []
    gt_avg_weight_col = []
    count_distribution_error_col = []
    camera_col = []

    for loss_factor in loss_factors:
        avg_weight_error_col.append([])
        gutted_avg_weight_col.append([])
        count_distribution_error_col.append([])

    for cohort_name in sorted(list(dfs.keys())):
        print(cohort_name)
        
        gt_metadata = gt_metadatas[cohort_name]

        last_feeding_date = gt_metadata['last_feeding_date']
        slaughter_date = gt_metadata['slaughter_date']

        if slaughter_date is not None and last_feeding_date is not None:
            date_diff = datetime.strptime(slaughter_date, '%Y-%m-%d') - datetime.strptime(last_feeding_date, '%Y-%m-%d')
            starvation_days = date_diff.days
        else:
            starvation_days = None

        df = dfs[cohort_name]
        df['estimated_weight_g'] = df[key]
        final_date_post_feeding = add_days(gt_metadata['last_feeding_date'], final_days_post_feeding)
        tdf = df[df.date <= final_date_post_feeding]

        start_end_hours = []

        for method in hour_filter_methods:
            if method == 'manual':
                for start_hour in start_hours:
                    for end_hour in end_hours:
                        start_end_hours.append((method, start_hour, end_hour))
            elif method == 'u-shape':
                df2 = df[(df.hour >= 3) & (df.hour <= 20)]

                #count, bins, _ = plt.hist(df2.hour, density = True, bins = (np.max(df2.hour) - np.min(df2.hour)))

                start_hour = np.min(df2.hour)
                end_hour = np.max(df2.hour)

                bins = np.arange(start_hour, end_hour + 1)

                weights = []

                for hour in np.arange(start_hour, end_hour + 1):
                    avg_weight = np.mean(df2[df2.hour == hour].estimated_weight_g)
                    weights.append(avg_weight)

                start_index = np.where(bins == 10)[0][0]

                lower_index = start_index
                upper_index = start_index

                is_iterating = True
                eps = 3

                while is_iterating:
                #     print(np.std(weights[lower_index:upper_index]))
                    if lower_index > 0 and upper_index < len(weights) - 1 and np.abs(weights[upper_index + 1] - weights[lower_index - 1]) < eps * np.std(weights[lower_index - 1:upper_index + 1]):
                        lower_index = lower_index - 1
                        upper_index = upper_index + 1
                    elif lower_index > 0 and np.abs(weights[upper_index] - weights[lower_index - 1]) < eps * np.std(weights[lower_index - 1:upper_index]):
                        lower_index = lower_index - 1
                    elif upper_index < len(weights) - 1 and np.abs(weights[upper_index + 1] - weights[lower_index]) < eps * np.std(weights[lower_index:upper_index + 1]):
                        upper_index = upper_index + 1
                    else:
                        is_iterating = False

                start_hour, end_hour = bins[lower_index], bins[upper_index]
                
                start_end_hours.append((method, start_hour, end_hour))
            elif method == 'hour_hist':
                df2 = df[(df.hour >= 3) & (df.hour <= 20)]

                count, bins, _ = plt.hist(df2.hour, density = True, bins = (np.max(df2.hour) - np.min(df2.hour)))

                idx_values = np.where(count > 1.0 / 18)[0]

                start_index = np.where(bins == 10)[0][0]
                start_array = np.where(idx_values == start_index)[0][0]

                lower_index = start_array
                upper_index = start_array

                while lower_index > 0 and (idx_values[lower_index] - idx_values[lower_index - 1] == 1):
                    lower_index = lower_index - 1
                while upper_index < len(idx_values) - 1 and (idx_values[upper_index + 1] - idx_values[upper_index] == 1):
                    upper_index = upper_index + 1

                start_hour, end_hour = bins[idx_values[lower_index]], bins[idx_values[upper_index]]

                start_end_hours.append((method, start_hour, end_hour))

        for akpd_cutoff in akpd_cutoffs:
            for method, start_hour, end_hour in start_end_hours:
                sampling_filter = SamplingFilter(
                    start_hour=start_hour,
                    end_hour=end_hour,
                    kf_cutoff=0.0,
                    akpd_score_cutoff=akpd_cutoff
                )

                pm_base = gen_pm_base(tdf, sampling_filter)

                try:
                    weights = generate_raw_individual_values(pm_base, gt_metadata, start_hour, end_hour, apply_growth_rate, max_day_diff, days_post_feeding, final_days_post_feeding)
                except ValidationError as err:
                    continue

                akpd_cutoff_col.append(akpd_cutoff)
                cohort_name_col.append(cohort_name)
                hour_filter_method_col.append(method)
                start_hour_col.append(start_hour)
                end_hour_col.append(end_hour)
                loss_factor_col.append(gt_metadata['expected_loss_factor'])
                starvation_days_col.append(starvation_days)
                avg_weight_col.append(np.mean(weights))
                gt_avg_weight_col.append(gt_metadata['gutted_average_weight'])
                camera_col.append(camera_type[cohort_name])

                for index, loss_factor in enumerate(loss_factors):
                    if loss_factor == 'expected_loss_factor':
                        loss_factor = gt_metadata['expected_loss_factor'] or 0.165

                        if loss_factor > 10:
                            loss_factor = loss_factor / 100.0

                    avg_weight_err, gutted_weight_prediction = generate_average_weight_accuracy(weights, gt_metadata, loss_factor)
                    avg_weight_error_col[index].append(avg_weight_err)
                    gutted_avg_weight_col[index].append(gutted_weight_prediction)

                    count_distribution_errors = generate_distribution_accuracy(weights, gt_metadata, loss_factor)
                    count_distribution_error_col[index].append(count_distribution_errors)
                    
    columns = {
        'cohort_name': cohort_name_col,
        'hour_filter_method_col': hour_filter_method_col,
        'akpd_cutoff_col': akpd_cutoff_col,
        'start_hour_col': start_hour_col,
        'end_hour_col': end_hour_col,
        'loss_factor_col': loss_factor_col,
        'starvation_days_col': starvation_days_col,
        'avg_weight_col': avg_weight_col,
        'gt_avg_weight_col': gt_avg_weight_col,
        'camera_col': camera_col
    }

    for index, loss_factor in enumerate(loss_factors):
        if loss_factor == 'expected_loss_factor':
            col_name = 'avg_weight_error_exp'
            col_gutted_name = 'avg_gutted_weight_exp'
            col_abs_name = 'avg_weight_error_abs_exp'
            col_abs_dist_name = 'avg_count_dist_error_abs_exp'
        else:
            col_name = 'avg_weight_error_%0.2f' % (loss_factor,)
            col_gutted_name = 'avg_gutted_weight_%0.2f' % (loss_factor,)
            col_abs_name = 'avg_weight_error_abs_%0.2f' % (loss_factor,)
            col_abs_dist_name = 'avg_count_dist_error_abs_%0.2f' % (loss_factor,)

        columns[col_name] = avg_weight_error_col[index]
        columns[col_gutted_name] = gutted_avg_weight_col[index]
        columns[col_abs_name] = np.abs(avg_weight_error_col[index])
        columns[col_abs_dist_name] = [np.mean(np.abs(l)) for l in count_distribution_error_col[index]]

    new_tdf = pd.DataFrame(columns)
    
    all_tdfs.append(new_tdf)

In [None]:
df['weight_v9']

In [None]:
all_dfs1 = []
all_dfs2 = []
all_dfs3 = []
all_dfs4 = []
all_dfs5 = []
all_dfs6 = []

for tdf in all_tdfs:
    columns = ['cohort_name',  'starvation_days_col', 'camera_col', 'avg_weight_col', 'avg_gutted_weight_0.16', 'gt_avg_weight_col', 'avg_weight_error_0.16', 'avg_weight_error_exp']
    df1 = tdf[(tdf.akpd_cutoff_col == 0.01) & (tdf.hour_filter_method_col == 'manual')][columns]
    df2 = tdf[(tdf.akpd_cutoff_col == 0.01) & (tdf.hour_filter_method_col == 'hour_hist')][columns]
    df3 = tdf[(tdf.akpd_cutoff_col == 0.01) & (tdf.hour_filter_method_col == 'u-shape')][columns]
    df4 = tdf[(tdf.akpd_cutoff_col == 0.95) & (tdf.hour_filter_method_col == 'manual')][columns]
    df5 = tdf[(tdf.akpd_cutoff_col == 0.95) & (tdf.hour_filter_method_col == 'hour_hist')][columns]
    df6 = tdf[(tdf.akpd_cutoff_col == 0.95) & (tdf.hour_filter_method_col == 'u-shape')][columns]
    
    all_dfs1.append(df1)
    all_dfs2.append(df2)
    all_dfs3.append(df3)
    all_dfs4.append(df4)
    all_dfs5.append(df5)
    all_dfs6.append(df6)
    
all_all_dfs = [
    all_dfs1,
    all_dfs2,
    all_dfs3,
    all_dfs4,
    all_dfs5,
    all_dfs6
]

In [None]:
for index, model in enumerate(models):
    metric = []

    for all_dfs in all_all_dfs:
        metric.append(np.sqrt(np.mean((50 * np.abs(all_dfs[index]['avg_weight_error_0.16'])) ** 2)))

    print(np.mean(metric))

In [None]:
for all_dfs in all_all_dfs:
    metric = []

    for index, model in enumerate(models):
        metric.append(np.sqrt(np.mean((50 * np.abs(all_dfs[index]['avg_weight_error_0.16'])) ** 2)))

    print(np.mean(metric))

In [None]:
all_dfs = all_dfs2

metric = []

print('%-*s: 90Pct, 50Pct, Metric, Avg16, AvgAbs16, AvgNormAbs16, AvgAbsExp, Std16' % (25, 'Model'))

for index, model in enumerate(models):
    _, tag, _, _, _, _, _ = model
    print('%-*s: %0.2f,  %0.2f,  %0.2f,    %0.2f,  %0.2f,     %0.2f,         %0.2f,       %0.2f' % (25, tag, 100 * np.percentile(np.abs(all_dfs[index]['avg_weight_error_0.16']), 90), 100 * np.percentile(np.abs(all_dfs[index]['avg_weight_error_0.16']), 50), np.sqrt(np.mean((50 * np.abs(all_dfs[index]['avg_weight_error_0.16'])) ** 2)), 100 * np.mean((all_dfs[index]['avg_weight_error_0.16'])), 100 * np.mean(np.abs(all_dfs[index]['avg_weight_error_0.16'])), 100 * np.mean(np.abs(all_dfs[index]['avg_weight_error_0.16'] - np.mean(all_dfs[index]['avg_weight_error_0.16']))), 100 * np.mean(np.abs(all_dfs[index]['avg_weight_error_exp'])), 100 * np.std((all_dfs[index]['avg_weight_error_0.16']))))
    metric.append(np.sqrt(np.mean((50 * np.abs(all_dfs[index]['avg_weight_error_0.16'])) ** 2)))
    
print(np.mean(metric))

In [None]:
from matplotlib.pyplot import cm

plt.figure(figsize=(20, 10))

color=iter(cm.rainbow(np.linspace(0,1,len(models))))

for index, model in enumerate(models):
    c = next(color)
    
    _, tag, _, _, _, _, _ = model
    plt.scatter(all_dfs[index].gt_avg_weight_col, all_dfs[index]['avg_weight_error_0.16'], color = c, label=tag)

plt.legend()

In [None]:
all_dfs1[0]

In [None]:
all_dfs1[2]

In [None]:
all_dfs1[-1]

In [None]:
dfs['vikane_pen_id_60_2020-08-10_2020-08-30'].iloc[0]