In [1]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
from aquabyte.visualize import Visualizer, _normalize_world_keypoints
import random
from scipy.stats import norm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from copy import copy

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_RESEARCH_SQL_CREDENTIALS'])))
query = """
    select * from research.fish_metadata a left join keypoint_annotations b
    on a.left_url = b.left_image_url 
    where b.keypoints is not null and b.is_qa = false;
"""
df = rds_access_utils.extract_from_database(query)

In [3]:
def get_world_keypoints(row):
    if 'leftCrop' in row.keypoints and 'rightCrop' in row.keypoints:
        return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)
    else:
        return None
    
df['world_keypoints'] = df.apply(
    lambda x: get_world_keypoints(x), axis=1
)

df = df[~df.world_keypoints.isnull()]



  depth = focal_length_pixel * baseline / np.array(disp)


In [6]:
df.head()

Unnamed: 0,left_url,fish_id,weight,data,stereo_parameters_url,ts_created,ts_updated,data_collection_type_id,id,fish_detection_id,annotated_by_email,is_qa,is_skipped,is_blurry,is_dark,is_occluded,is_bad_orientation,is_partial,direction,keypoints,work_duration_left_ms,work_duration_right_ms,created_at,updated_at,site_id,pen_id,left_image_url,right_image_url,left_crop_metadata,right_crop_metadata,camera_metadata,captured_at,is_obscured_floy_tag,is_floy_tag_not_present,world_keypoints
0,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,190814-9df9971d-89bc-4d4a-a23e-d055acb4c1c0,6200,"{'species': 'salmon', 'location': 'Vikingfjord...",http://aquabyte-stereo-parameters.s3.eu-west-1...,2019-08-15 07:01:14.190397+00:00,2019-08-15 07:01:14.190397+00:00,,648742,,bati4@cogitotech.com,False,False,,,,,,LEFT,"{'version': 2, 'leftCrop': [{'xCrop': 930, 'yC...",59389,57733.0,2019-08-16 06:45:44.715821+00:00,2019-08-16 06:45:44.715821+00:00,1,2,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,"{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'baseline': 0.12693501988129197, 'focalLength...",2019-08-14 07:02:20.147000+00:00,,,"{'UPPER_LIP': [-0.3056053353708363, 0.66920642..."
1,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,190808-2b62daf4-724d-48ce-9e7a-f750fd8e2fc2,7385,"{'species': 'salmon', 'location': 'Vikingfjord...",http://aquabyte-stereo-parameters.s3.eu-west-1...,2019-08-15 06:59:35.439147+00:00,2019-08-15 06:59:35.439147+00:00,,648746,,bati9@cogitotech.com,False,False,,,,,,LEFT,"{'version': 2, 'leftCrop': [{'xCrop': 594, 'yC...",24308,29713.0,2019-08-16 06:46:16.618962+00:00,2019-08-16 06:46:16.618962+00:00,1,2,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,"{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'baseline': 0.12693501988129197, 'focalLength...",2019-08-08 11:38:34.798000+00:00,,,"{'UPPER_LIP': [-0.39659621278386314, 0.6677672..."
2,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,190808-b1bd9c5a-aefe-444a-b31a-c90618a15445,6250,"{'species': 'salmon', 'location': 'Vikingfjord...",http://aquabyte-stereo-parameters.s3.eu-west-1...,2019-08-15 07:00:24.115124+00:00,2019-08-15 07:00:55.492142+00:00,,648756,,bati9@cogitotech.com,False,False,,,,,,LEFT,"{'version': 2, 'leftCrop': [{'xCrop': 720, 'yC...",26258,26982.0,2019-08-16 06:48:21.041620+00:00,2019-08-16 06:48:21.041620+00:00,1,2,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,"{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'baseline': 0.12693501988129197, 'focalLength...",2019-08-08 11:47:38.862000+00:00,,,"{'UPPER_LIP': [-0.3630088420147322, 0.66920642..."
3,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,190808-6116e6fc-39d9-45fd-a9d6-44fcd13969a6,4990,"{'species': 'salmon', 'location': 'Vikingfjord...",http://aquabyte-stereo-parameters.s3.eu-west-1...,2019-08-15 06:59:58.790746+00:00,2019-08-15 06:59:58.790746+00:00,,648767,,bati9@cogitotech.com,False,False,,,,,,LEFT,"{'version': 2, 'leftCrop': [{'xCrop': 1063, 'y...",26841,28517.0,2019-08-16 06:51:18.998948+00:00,2019-08-16 06:51:18.998948+00:00,1,2,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,"{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'baseline': 0.12693501988129197, 'focalLength...",2019-08-08 09:43:25.253000+00:00,,,"{'UPPER_LIP': [-0.2704153646312738, 0.67210341..."
4,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,190808-e8bdd8f4-83ea-424c-a10a-e8e8632f483f,3325,"{'species': 'salmon', 'location': 'Vikingfjord...",http://aquabyte-stereo-parameters.s3.eu-west-1...,2019-08-15 07:01:23.935502+00:00,2019-08-15 07:01:36.702504+00:00,,648770,,bati20@cogitotech.com,False,False,,,,,,LEFT,"{'version': 2, 'leftCrop': [{'xCrop': 1052, 'y...",29577,35384.0,2019-08-16 06:52:04.560847+00:00,2019-08-16 06:52:04.560847+00:00,1,2,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,http://aquabyte-axiom-rectified.s3.eu-west-1.a...,"{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'width': 4095, 'height': 2999, 'x_coord': 0, ...","{'baseline': 0.12693501988129197, 'focalLength...",2019-08-08 10:13:34.296000+00:00,,,"{'UPPER_LIP': [-0.2722566315110491, 0.66920642..."


In [7]:
blacklisted_keypoint_annotation_ids = [
    606484, 
    635806, 
    637801, 
    508773, 
    640493, 
    639409, 
    648536, 
    507003,
    706002,
    507000,
    709298,
    714073,
    719239
]

df = df[~df.id.isin(blacklisted_keypoint_annotation_ids)]

In [8]:
def _generate_rotation_matrix(u_base, v):
    u = v / np.linalg.norm(v)
    n = np.cross(u_base, u)
    n = n / np.linalg.norm(n)
    theta = -np.arccos(np.dot(u, u_base))

    R = np.array([[
        np.cos(theta) + n[0]**2*(1-np.cos(theta)), 
        n[0]*n[1]*(1-np.cos(theta)) - n[2]*np.sin(theta),
        n[0]*n[2]*(1-np.cos(theta)) + n[1]*np.sin(theta)
    ], [
        n[1]*n[0]*(1-np.cos(theta)) + n[2]*np.sin(theta),
        np.cos(theta) + n[1]**2*(1-np.cos(theta)),
        n[1]*n[2]*(1-np.cos(theta)) - n[0]*np.sin(theta),
    ], [
        n[2]*n[0]*(1-np.cos(theta)) - n[1]*np.sin(theta),
        n[2]*n[1]*(1-np.cos(theta)) + n[0]*np.sin(theta),
        np.cos(theta) + n[2]**2*(1-np.cos(theta))
    ]])
    
    return R

def _normalize_world_keypoints(wkps, rotate=True):
    body_parts = wkps.keys()
    
    # translate keypoints such that tail notch is at origin
    if wkps['UPPER_LIP'][0] > wkps['HYPURAL_PLATE'][0]:
        front_bp, back_bp = 'UPPER_LIP', 'HYPURAL_PLATE'
    else:
        front_bp, back_bp = 'HYPURAL_PLATE', 'UPPER_LIP'
        
    translated_wkps = {bp: wkps[bp] - wkps[back_bp] for bp in body_parts}

    if not rotate:
        return translated_wkps
    
    # perform first rotation
    u_base=np.array([1, 0, 0])
    v = translated_wkps[front_bp]
    R = _generate_rotation_matrix(u_base, v)
    norm_wkps_intermediate = {bp: np.dot(R, translated_wkps[bp]) for bp in body_parts}
    
    return norm_wkps_intermediate

In [9]:
BODY_PARTS = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])


In [11]:
# gtsf_fish_identifiers = list(df.fish_id.unique())
# train_size = int(0.8 * len(gtsf_fish_identifiers))
# fish_ids = random.sample(gtsf_fish_identifiers, train_size)
# train_mask = df.fish_id.isin(fish_ids)
train_mask = df.captured_at <= '2019-09-06'

In [12]:
df['focal_length_pixel'] = df.camera_metadata.apply(lambda x: x['focalLengthPixel']) 

In [19]:
df['length'] = df.world_keypoints.apply(lambda x: euclidean_distance(x['UPPER_LIP'], x['TAIL_NOTCH']))

In [20]:
df['gt_length'] = df.data.apply(lambda x: x['lengthMms'] / 1e3)

In [21]:
df['depth'] = df.world_keypoints.apply(lambda x: np.mean([k[1] for k in x.values()]))

In [22]:
df['length_diff'] = df.length - df.gt_length

In [23]:

df.loc[(~mask) & (df.depth > 0.7), ['depth', 'id', 'length_diff']].head(100)

Unnamed: 0,depth,id,length_diff
25,0.701409,707199,-0.008149
44,0.702175,707246,0.012383
53,0.706675,707263,-0.005169
64,0.704577,707290,0.00535
66,0.708706,707304,-0.000528
73,0.701467,707414,-0.003877
74,0.700268,707392,0.001077
77,0.700346,707445,0.000755
296,0.703759,707454,0.000188
320,0.973905,606085,-0.121296


In [24]:
df.loc[mask, ['depth', 'id', 'length_diff']]

Unnamed: 0,depth,id,length_diff
1867,0.661099,726134,0.008115
1868,0.645997,726146,0.003525
1869,0.650848,726162,0.010265
1870,0.648726,726164,0.002906
1871,0.65151,726182,-0.001318
3867,0.658833,726135,-0.003184
3868,0.652579,726141,0.01006
3869,0.653974,726138,0.003459
3870,0.645086,726147,0.010566
3871,0.659985,726157,0.003815


In [25]:

mask = (df.focal_length_pixel > 2447.71) & (df.focal_length_pixel < 2447.72)# & (df.captured_at < '2019-09-01')
np.median(df[mask].length - df[mask].gt_length)


0.007299647360026196

In [26]:
mask = (df.focal_length_pixel > 2447.71) & (df.focal_length_pixel < 2447.72)
np.median(df[~mask].length - df[~mask].gt_length)


-0.01065068469078656

In [29]:
def well_behaved(wkps, cutoff_depth=10.0):
    if any([abs(wkp[1]) > cutoff_depth for wkp in wkps.values()]):
        return False
    return True
    

def flip_center_kps(left_kps, right_kps, cm):
    
    x_min_l = min([kp[0] for kp in left_kps.values()])
    x_max_l = max([kp[0] for kp in left_kps.values()])
    x_mid_l = np.mean([x_min_l, x_max_l])
    
    y_min_l = min([kp[1] for kp in left_kps.values()])
    y_max_l = max([kp[1] for kp in left_kps.values()])
    y_mid_l = np.mean([y_min_l, y_max_l])
    
    x_min_r = min([kp[0] for kp in right_kps.values()])
    x_max_r = max([kp[0] for kp in right_kps.values()])
    x_mid_r = np.mean([x_min_r, x_max_r])
    
    y_min_r = min([kp[1] for kp in right_kps.values()])
    y_max_r = max([kp[1] for kp in right_kps.values()])
    y_mid_r = np.mean([y_min_r, y_max_r])
        
    fc_left_kps, fc_right_kps = {}, {}
    flip_factor = 1 if left_kps['UPPER_LIP'][0] > left_kps['TAIL_NOTCH'][0] else -1
    for bp in BODY_PARTS:
        left_kp, right_kp = left_kps[bp], right_kps[bp]
        if flip_factor > 0:
            fc_left_kp = np.array([left_kp[0] - x_mid_l, left_kp[1] - y_mid_l])
            fc_right_kp = np.array([right_kp[0] - x_mid_l, right_kp[1] - y_mid_l])
        else:
            fc_right_kp = np.array([x_mid_r - left_kp[0], left_kp[1] - y_mid_r])
            fc_left_kp = np.array([x_mid_r - right_kp[0], right_kp[1] - y_mid_r])
        fc_left_kps[bp] = fc_left_kp
        fc_right_kps[bp] = fc_right_kp
        
    return fc_left_kps, fc_right_kps


def translate_kps(left_kps, right_kps, factor):
    t_left_kps, t_right_kps = {}, {}
    for bp in BODY_PARTS:
        left_kp, right_kp = left_kps[bp], right_kps[bp]
        t_left_kps[bp] = factor * np.array(left_kps[bp])
        t_right_kps[bp] = factor * np.array(right_kps[bp])
    
    return t_left_kps, t_right_kps


def jitter_kps(left_kps, right_kps, jitter):
    j_left_kps, j_right_kps = {}, {}
    for bp in BODY_PARTS:
        j_left_kps[bp] = np.array([left_kps[bp][0] + np.random.normal(0, jitter), 
                                   left_kps[bp][1] + np.random.normal(0, jitter)])
        j_right_kps[bp] = np.array([right_kps[bp][0] + np.random.normal(0, jitter), 
                                    right_kps[bp][1] + np.random.normal(0, jitter)])
    
    return j_left_kps, j_right_kps


def modify_kps(left_kps, right_kps, factor, jitter, cm):
    fc_left_kps, fc_right_kps = flip_center_kps(left_kps, right_kps, cm)
    t_left_kps, t_right_kps = translate_kps(fc_left_kps, fc_right_kps, factor)
    j_left_kps, j_right_kps  = jitter_kps(t_left_kps, t_right_kps, jitter)
    j_left_kps_list, j_right_kps_list = [], []
    for bp in BODY_PARTS:
        l_item = {
            'keypointType': bp,
            'xFrame': j_left_kps[bp][0] + cm['pixelCountWidth'] / 2.0,
            'yFrame': j_left_kps[bp][1] + cm['pixelCountHeight'] / 2.0
        }
        
        r_item = {
            'keypointType': bp,
            'xFrame': j_right_kps[bp][0] + cm['pixelCountWidth'] / 2.0,
            'yFrame': j_right_kps[bp][1] + cm['pixelCountHeight'] / 2.0
        }
        
        j_left_kps_list.append(l_item)
        j_right_kps_list.append(r_item)
        
    modified_kps = {
        'leftCrop': j_left_kps_list,
        'rightCrop': j_right_kps_list
    }
    
    print(modified_kps)
    return modified_kps


def process_row(row, n_factors=1, jitters=[0], low=0.3, high=2.5, oos=False, network=None):
    X_row, labels_row, est_weights = [], [], []
    keypoints = row.keypoints
    left_keypoints_list = keypoints.get('leftCrop')
    right_keypoints_list = keypoints.get('rightCrop')
    cm = row.camera_metadata
    
    if left_keypoints_list and right_keypoints_list:
        wkps = pixel2world(left_keypoints_list, right_keypoints_list, cm)
        left_kps = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in left_keypoints_list}
        right_kps = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in right_keypoints_list}
        if well_behaved(wkps):
            for n in range(n_factors):
                factor = 1.0 if n_factors == 1 else np.random.uniform(low=low, high=high)
                for jitter in jitters:
                    trials = 3 if jitter > 0 else 1
                    for t in range(trials):
                        modified_kps = modify_kps(left_kps, right_kps, factor, jitter, cm)
                        modified_wkps = pixel2world(modified_kps['leftCrop'], modified_kps['rightCrop'], cm)
                        data_point = []
                        for bp in BODY_PARTS:
                            wkp = modified_wkps[bp]
                            data_point.append([wkp[0] / wkp[1], wkp[2] / wkp[1], 0.1 / wkp[1]])
                        X_row.append(data_point)
                        if not oos:
                            labels_row.append(row.weight)
                        if network:
                            u = torch.from_numpy(np.array(data_point) / 2.0).float()
                            est_weights.append(network(u.view(1, *u.shape)).item())

    return X_row, labels_row, est_weights




In [30]:

for ix, row in df[df.index == 11].iterrows():
    X_row, labels_row, est_weights = process_row(row, n_factors=1, jitters=[0], network=network)

NameError: name 'network' is not defined

In [None]:
%matplotlib inline
plt.gca().set_aspect('equal', adjustable='box')
plt.scatter(np.array(X_row[0])[:, 0], np.array(X_row[0])[:, 1])
plt.grid()
plt.show()

In [None]:
%matplotlib inline
plt.gca().set_aspect('equal', adjustable='box')
plt.scatter(np.array(X_row[0])[:, 0], np.array(X_row[0])[:, 2])
plt.grid()
plt.show()

In [None]:
df[df.id == kpids[k]].world_keypoints.iloc[0]

In [None]:
est_weights, labels_row

In [None]:
df[~train_mask].data.iloc[-1]

In [None]:
df[~train_mask].world_keypoints.iloc[-1]

In [None]:
df['length'] = df.world_keypoints.apply(lambda x: euclidean_distance(x['UPPER_LIP'], x['TAIL_NOTCH']))
df['gt_length'] = df.data.apply(lambda x: x['lengthMms'] / 1e3)

In [None]:
df[~train_mask].length - df[~train_mask].gt_length

In [None]:

df[train_mask].length - df[train_mask].gt_length

In [None]:
np.std(df[~train_mask & (df.weight < 1000)].length - df[~train_mask & (df.weight < 1000)].gt_length)

In [None]:
%matplotlib inline
idx = 3000
plt.gca().set_aspect('equal', adjustable='box')
plt.scatter(np.array(X[idx])[:, 0], np.array(X[idx])[:, 1])

In [None]:
np.random.seed(0)
random.seed(0)
X, labels = [], []


row_count = 0
for idx, row in df[train_mask].iterrows():
    
    X_row, labels_row, _ = process_row(row, n_factors=5, jitters=[0, 10, 20])
    X.extend(X_row)
    labels.extend(labels_row)
    
    if row_count % 1000 == 0:
        print('Percentage complete: {}'.format(row_count / df[train_mask].shape[0]))
    row_count += 1
    
X, labels = np.array(X) / 2.0, np.array(labels) / 10000.0


In [None]:
class KeypointsDataset(Dataset):
    """Keypoints dataset."""

    def __init__(self, X, labels, transform=None):
        self.X = X
        self.labels = labels

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.labels[idx]

        return torch.from_numpy(x).float(), torch.from_numpy(np.array([y])).float()


In [None]:
# TODO: Define your network architecture here
import torch
from torch import nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(33, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x
        



In [None]:
from torch.utils.data import DataLoader

dataset = KeypointsDataset(X, labels)
dataloader = DataLoader(dataset, batch_size=25, shuffle=True, num_workers=1)


In [None]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# network = Network()
# network = network.to(device)
epochs = 2000
optimizer = torch.optim.Adam(network.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data_batch in enumerate(dataloader):
        optimizer.zero_grad()
        X_batch, y_batch = data_batch
#         X_batch = X_batch.to(device)
#         y_batch = y_batch.to(device)
        y_pred = network(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 0 and i > 0:
            print(running_loss / i)
    
    loss_for_epoch = running_loss / len(dataloader)
    
    # print validation loss
    preds = network(torch.from_numpy(X_t).float())
    predictions = preds.detach().numpy().squeeze()
    weights = labels_t
    accuracy = np.mean(abs((predictions - weights) / weights))
    
    print('Loss for epoch {}: {}'.format(epoch, loss_for_epoch))
    print('Validation accuracy: {}'.format(accuracy))





In [None]:
loss_for_epoch

In [None]:
body_parts

In [None]:
X_t[idx]

In [None]:
print('Prediction: {}'.format(predictions[idx] * 1e4))
print('Ground Truth: {}'.format(labels_t[idx] * 1e4))

In [None]:
idx = 9
plt.gca().set_aspect('equal', adjustable='box')
plt.scatter(X[idx][:, 0], X[idx][:, 1])

In [None]:
u = torch.from_numpy(X[idx]).float()
pred = network(u.view(1, *u.shape)).item() * 1e4
print('Prediction: {}'.format(pred))
print('Ground Truth: {}'.format(labels[idx] * 1e4))

In [None]:
X[idx]

In [None]:
list(np.where(0.05 / X[:, :, 2].mean(axis=1) > 2)[0])

In [None]:
np.where(0.05 / X_t[:, :, 2].mean(axis=1) > 2)

In [None]:
loss_for_epoch

In [None]:
torch.cuda.is_available()

In [None]:
df.iloc[1833]

In [None]:
kpids, X_t, labels_t = [], [], []
body_parts = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])

for idx, row in df[~train_mask].iterrows():
    X_row, labels_row, _ = process_row(row, n_factors=1, jitters=[0])
    X_t.extend(X_row)
    labels_t.extend(labels_row)
    kpids.append(row.id)
    
    
X_t, labels_t = np.array(X_t) / 2.0, np.array(labels_t) / 10000.0


 
 

In [None]:
kpids[22], kpids[24]

In [None]:
df[train_mask].camera_metadata.iloc[-1]

In [None]:
df[(~train_mask) & (df.weight > 1000)].camera_metadata.iloc[-1]

In [None]:
df[(~train_mask) & (df.weight < 1000)].id

In [None]:
preds = network(torch.from_numpy(X_t).float())
predictions = preds.detach().numpy().squeeze()
weights = labels_t

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(weights * 1e4, predictions * 1e4)
plt.plot([0, 1e4], [0, 1e4], color='red')
plt.xlim([0, 1e4])
plt.ylim([0, 1e4])
plt.xlabel('Ground truth weight (grams)')
plt.ylabel('Prediction (grams)')
plt.grid()
plt.show()

In [None]:
plt.hist(.05 / X_t[:, 0, 2])
plt.show()

In [None]:
k = .05 / X[:, 0, 2]
plt.hist(k[(k > 0) & (k < 3.0)])
plt.show()

In [None]:
np.mean(abs((predictions - weights)/weights))

In [None]:
(predictions.mean() - weights.mean())/weights.mean()

In [None]:
f = '/root/data/alok/biomass_estimation/results/model_lateral_only/results_557ec1732d8bc8bc66951d2ea4e69b935d69b111_model_lateral_only_research-exp-id-03-vikingfjord-20190709-20190710.h5'
tdf = pd.read_hdf(f, 'table')



In [None]:
X_o, est_weights = [], []
body_parts = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])

for idx, row in tdf.iterrows():
    X_row, _, est_weight = process_row(row, n_factors=1, jitters=[0], oos=True, network=network)
    X_o.extend(X_row)
    if len(est_weight) > 0:
        est_weights.extend(est_weight)
    else:
        est_weights.append(None)
    
X_o = np.array(X_o) / 2.0
        

 
 

In [None]:
tdf['est_weight'] = est_weights

In [None]:
tdf.est_weight.mean()

In [None]:
preds = network(torch.from_numpy(X_o).float())
predictions = preds.detach().numpy().squeeze()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(predictions * 1e4)
plt.show()

In [None]:
np.mean(predictions * 1e4)

In [None]:


plt.scatter(X_o[:, 1, 0], X_o[:, 1, 2], color='r', alpha=0.5)
plt.scatter(X[:, 1, 0], X[:, 1, 2], color='b', alpha=0.5)





In [None]:
tdf['depth'] = tdf.world_keypoints.apply(lambda x: x['EYE'][1] if x else None) 

In [None]:
plt.hist(tdf.depth)
plt.show()

In [None]:
725887, 725890

In [None]:
s3_access_utils = S3AccessUtils('/root/data')
credentials = json.load(open(os.environ['PROD_RESEARCH_SQL_CREDENTIALS']))
rds_access_utils = RDSAccessUtils(credentials)
v = Visualizer(s3_access_utils, rds_access_utils)


In [None]:
s3_access_utils = S3AccessUtils('/root/data')
credentials = json.load(open(os.environ['PROD_SQL_CREDENTIALS']))
rds_access_utils = RDSAccessUtils(credentials)
v = Visualizer(s3_access_utils, rds_access_utils)


In [None]:
%matplotlib inline
v.load_data(564449)
v.display_crops()

In [None]:
%matplotlib notebook
v.display_3d_keypoints()

In [None]:
def get_fov_cutoffs(fov, cm):
    fov = fov * np.pi / 180.0
    field_size_px = 2*cm['focalLengthPixel'] * np.tan(fov / 2.0)
    min_cutoff = (cm['pixelCountWidth'] - field_size_px) / 2.0
    max_cutoff = (cm['pixelCountWidth'] + field_size_px) / 2.0
    return min_cutoff, max_cutoff

def is_preserved(keypoints, min_cutoff, max_cutoff):
    min_x_left = min([item['xFrame'] for item in keypoints['leftCrop']])
    max_x_left = max([item['xFrame'] for item in keypoints['leftCrop']])
    min_x_right = min([item['xFrame'] for item in keypoints['rightCrop']])
    max_x_right = max([item['xFrame'] for item in keypoints['rightCrop']])
    
    if (min_x_left < min_cutoff) or (min_x_right < min_cutoff) or (max_x_left > max_cutoff) or (max_x_right > max_cutoff):
        return False
    return True
        
    
    
    
    

In [None]:
fovs = list(np.arange(10, 55, 1))
for fov in fovs:
    min_cutoff, max_cutoff = get_fov_cutoffs(fov, tdf.camera_metadata.iloc[0])
    is_preserved_list = []
    for idx, row in tdf.iterrows():
        keypoints = row.keypoints
        if 'leftCrop' in keypoints and 'rightCrop' in keypoints:
            is_preserved_list.append(is_preserved(keypoints, min_cutoff, max_cutoff))
        else:
            is_preserved_list.append(False)

    tdf['is_preserved_{}'.format(fov)] = is_preserved_list



In [None]:
pred_weight_means, sample_sizes = [], []
for fov in fovs:
    mask = tdf['is_preserved_{}'.format(fov)] == True
#     pred_weight_means.append(tdf[mask].est_weight.mean() * 1e4)
    pred_weight_means.append(tdf[mask].estimated_biomass_g.median())
    sample_sizes.append(tdf[mask].shape[0])


<h1> Waiting pen ID #1 </h1>

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, pred_weight_means, s=80)
plt.xlabel('Field of View (degrees)')
plt.ylabel('Estimated biomass (g)')
plt.title('Empirlcal Optical Samling Bias')
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, sample_sizes)
plt.grid()
plt.show()



<h1> Waiting pen ID #2 </h1>

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, pred_weight_means, s=80)
plt.xlabel('Field of View (degrees)')
plt.ylabel('Estimated biomass (g)')
plt.title('Empirlcal Optical Samling Bias')
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, sample_sizes)
plt.grid()
plt.show()



<h1> Waiting Pen ID #3 </h1>

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, pred_weight_means, s=80)
plt.xlabel('Field of View (degrees)')
plt.ylabel('Estimated biomass (g)')
plt.title('Empirlcal Optical Samling Bias')
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, sample_sizes)
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, pred_weight_means, s=80)
plt.xlabel('Field of View (degrees)')
plt.ylabel('Estimated biomass (g)')
plt.title('Empirlcal Optical Samling Bias')
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, sample_sizes)
plt.grid()
plt.show()



In [None]:
tdf[tdf.depth > 1.75].est_weight.mean()

In [None]:
tdf[tdf.is_preserved_29 == True]