# GTSF phase I: biomass prediction

In this notebook, we are forecasting the weights by finding the closest blender model

### Look at the volumes created with blender

Load blender data

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import cv2

import glob
import os
import boto3
import tempfile
from sqlalchemy import create_engine, MetaData, Table, select, and_, func
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.automap import automap_base
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from scipy.stats import norm
import tqdm
import pickle
from itertools import combinations
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.optics import euclidean_distance

from PIL import Image, ImageDraw
from multiprocessing import Pool, Manager
import copy
import uuid
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_rows', 500)





<h1> Get world keypoint coordinates from GTSF data </h1>

In [None]:
research_sql_credentials = json.load(open(os.environ["SQL_CREDENTIALS"]))
research_rds_access_utils = RDSAccessUtils(research_sql_credentials)
sql_engine = research_rds_access_utils.sql_engine
Session = sessionmaker(bind=sql_engine)
session = Session()

Base = automap_base()
Base.prepare(sql_engine, reflect=True)
Enclosure = Base.classes.enclosures
Calibration = Base.classes.calibrations
GtsfDataCollection = Base.classes.gtsf_data_collections
StereoFramePair = Base.classes.stereo_frame_pairs

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

<h1> Create training dataset </h1>

In [None]:
session.rollback()
sfps_all = session.query(StereoFramePair).all()
df = pd.DataFrame()

body_parts = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])

session.rollback()
for idx, row in enumerate(sfps_all):
    if idx % 100 == 0:
        print(idx)
        
    # get fish_id and ground truth metadata
    if row.gtsf_fish_identifier == '190321010002':
        continue
    ground_truth_metadata = json.loads(row.ground_truth_metadata)
    if ground_truth_metadata['data'].get('species') != 'salmon':
        continue
    
    left_keypoints = json.loads(row.left_image_keypoint_coordinates)
    right_keypoints = json.loads(row.right_image_keypoint_coordinates)
    wkps = json.loads(row.world_keypoint_coordinates)

    df_row = {'0': idx}
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            d = euclidean_distance(wkps[body_parts[i]], wkps[body_parts[j]])
            df_row['{0}-{1}'.format(i, j)] = d
    
    weight, length, kfactor = None, None, None
    if 'data' in ground_truth_metadata.keys():
        keys = ground_truth_metadata['data'].keys()
        if 'weight' in keys or 'weightKgs' in keys:
            weightKey = 'weight' if 'weight' in keys else 'weightKgs'
            lengthKey = 'length' if 'length' in keys else 'lengthMms'
            weight = ground_truth_metadata['data'][weightKey]
            length = ground_truth_metadata['data'][lengthKey]
            kfactor = (weight / length**3) * 1e5
    if not weight:
        print('No weight recorded for GTSF fish identifier: {}'.format(row.gtsf_fish_identifier))
        continue
        
        
    # calculate curvature
    wkp = {bp: [wkps[bp][2], wkps[bp][1], wkps[bp][0]] for bp in body_parts}
    fv1 = np.array(wkp['UPPER_LIP']) - np.array(wkp['DORSAL_FIN'])
    fv2 = np.array(wkp['UPPER_LIP']) - np.array(wkp['PELVIC_FIN'])
    n1 = np.cross(fv1, fv2)
    
    bv1 = np.array(wkp['PELVIC_FIN']) -  np.array(wkp['TAIL_NOTCH'])
    bv2 = np.array(wkp['DORSAL_FIN']) -  np.array(wkp['TAIL_NOTCH'])
    n2 = np.cross(bv1, bv2)
    curvature_theta = (180 / np.pi) * np.arccos(np.dot(n1, n2) / (np.linalg.norm(n1) * np.linalg.norm(n2)))
    
    df_row['weight'] = weight
    df_row['length'] = length
    df_row['kfactor'] = kfactor
    df_row['date'] = row.date
    df_row['project_name'] = row.annotations_project_name
    df_row['left_keypoints'] = json.loads(row.left_image_keypoint_coordinates)
    df_row['right_keypoints'] = json.loads(row.right_image_keypoint_coordinates)
    df_row['world_keypoints'] = wkps
    df_row['gtsf_fish_identifier'] = row.gtsf_fish_identifier
    df_row['epoch'] = row.epoch
    df_row['stereo_frame_pair_id'] = row.id
    df_row['curvature_theta'] = curvature_theta
        
    df = df.append(df_row, ignore_index=True)
            
    



<h1> Train Model with Old Calibration </h1>

In [None]:
def generate_train_mask(df, train_frac, randomize=True):
    x = np.zeros((df.shape[0]), dtype=bool)
    x[:int(train_frac * df.shape[0])] = True
    np.random.shuffle(x)
    mask = pd.Series(x)
    return x
    


In [None]:
# define all features

body_parts_subset = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
])

body_part_indices = [body_parts.index(bp) for bp in body_parts_subset]

pairwise_distance_columns = ['{0}-{1}'.format(x, y) for x, y in list(combinations(body_part_indices, 2))]
interaction_columns_quadratic = []
interaction_columns_cubic = []
for i in range(len(pairwise_distance_columns)):
    for j in range(i, len(pairwise_distance_columns)):
        col1 = pairwise_distance_columns[i]
        col2 = pairwise_distance_columns[j]
        interaction_column = '{},{}'.format(col1, col2)
        df[interaction_column] = df[col1] * df[col2]
        interaction_columns_quadratic.append(interaction_column)
        
for i in range(len(pairwise_distance_columns)):
    for j in range(i, len(pairwise_distance_columns)):
        for k in range(j, len(pairwise_distance_columns)):
            col1 = pairwise_distance_columns[i]
            col2 = pairwise_distance_columns[j]
            col3 = pairwise_distance_columns[k]
            interaction_column = '{},{},{}'.format(col1, col2, col3)
            df[interaction_column] = df[col1] * df[col2] * df[col3]
            interaction_columns_cubic.append(interaction_column)
            

In [None]:
np.random.seed(0)

mask = generate_train_mask(df, train_frac=0.8)
mask = mask & (~df.gtsf_fish_identifier.str.contains('190620')) & (df.weight > 2000)
columns = pairwise_distance_columns + interaction_columns_quadratic + interaction_columns_cubic

X_train = df.loc[mask, columns].values
y_train = df.loc[mask, 'weight'].values
X_test = df.loc[~mask, columns].values
y_test = df.loc[~mask, 'weight'].values

scaler = StandardScaler()
scaler.fit(X_train)
X_train_normalized = scaler.transform(X_train)

pca = PCA(n_components=min(X_train_normalized.shape[0], X_train_normalized.shape[1]))
pca.fit(X_train_normalized)
explained_variance_ratio = pca.explained_variance_ratio_.cumsum()
idx = np.where(explained_variance_ratio > 0.999999)[0][0]

pca = PCA(n_components=idx+1)
pca.fit(X_train_normalized)
X_train_transformed = pca.transform(X_train_normalized)
X_test_normalized = scaler.transform(X_test)
X_test_transformed = pca.transform(X_test_normalized)

reg = LinearRegression().fit(X_train_transformed, y_train)

y_pred = reg.predict(pca.transform(scaler.transform(df[columns].values)))
df['prediction'] = y_pred
df['error'] = df.prediction - df.weight
df['error_pct'] = df.error / df.weight
df['abs_error_pct'] = df.error_pct.abs()

model = {
    'mean': scaler.mean_,
    'std': scaler.scale_,
    'PCA_components': pca.components_,
    'reg_coef': reg.coef_,
    'reg_intercept': reg.intercept_,
    'body_parts': body_parts_subset
}



In [None]:
amg = AccuracyMetricsGenerator(mask.values, df.prediction.values, df.weight.values)

In [None]:
amg.plot_predictions_vs_ground_truth(impose_bounds=True)

In [None]:
pickle.dump(new_model, open('/root/data/models/new_model.pkl', 'wb'))

<h1> Generate world keypoints given new calbration </h1>

In [None]:
def convert_to_world_point(x, y, d, pixel_count_width, 
                           pixel_count_height, image_sensor_width, 
                           image_sensor_height, focal_length):
    """ from pixel coordinates to world coordinates """
    
    image_center_x = pixel_count_width / 2.0  
    image_center_y = pixel_count_height / 2.0
    px_x = x - image_center_x
    px_z = image_center_y - y

    sensor_x = px_x * (image_sensor_width / pixel_count_width)
    sensor_z = px_z * (image_sensor_height / pixel_count_height)

    world_y = d
    world_x = (world_y * sensor_x) / focal_length
    world_z = (world_y * sensor_z) / focal_length
    return [world_x, world_y, world_z]



def depth_from_disp(disp, focal_length_pixel, baseline):
    """ calculate the depth of the point based on the disparity value """
    depth = focal_length_pixel*baseline / np.array(disp)
    return depth


def euclidean_distance(p1, p2):
    return ((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2 + (p1[2] - p2[2])**2)**0.5

In [None]:
def load_params(params_file):
    params = json.load(open(params_file))
    cameraMatrix1 = np.array(params['CameraParameters1']['IntrinsicMatrix']).transpose()
    cameraMatrix2 = np.array(params['CameraParameters2']['IntrinsicMatrix']).transpose()

    distCoeffs1 = params['CameraParameters1']['RadialDistortion'][0:2] + \
                   params['CameraParameters1']['TangentialDistortion'] + \
                   [params['CameraParameters1']['RadialDistortion'][2]]
    distCoeffs1 = np.array(distCoeffs1)

    distCoeffs2 = params['CameraParameters2']['RadialDistortion'][0:2] + \
                   params['CameraParameters2']['TangentialDistortion'] + \
                   [params['CameraParameters2']['RadialDistortion'][2]]
    distCoeffs2 = np.array(distCoeffs2)

    R = np.array(params['RotationOfCamera2']).transpose()
    T = np.array(params['TranslationOfCamera2']).transpose()

    imageSize = (4096, 3000)
    
    # perform rectification
    (R1, R2, P1, P2, Q, leftROI, rightROI) = cv2.stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2, imageSize, R, T, None, None, None, None, None, cv2.CALIB_ZERO_DISPARITY, 0)
    

    left_maps = cv2.initUndistortRectifyMap(cameraMatrix1, distCoeffs1, R1, P1, imageSize, cv2.CV_16SC2)
    right_maps = cv2.initUndistortRectifyMap(cameraMatrix2, distCoeffs2, R2, P2, imageSize, cv2.CV_16SC2)
    
    return left_maps, right_maps, cameraMatrix1, distCoeffs1, R1, P1, cameraMatrix2, distCoeffs2, R2, P2



In [None]:
stereo_params_old_file, stereo_params_new_file

In [None]:
left_keypoints_new_list = []
right_keypoints_new_list = []
world_keypoints_new_list = []


# get old stereo parameters files

stereo_params_old = {
    'bucket': 'aquabyte-stereo-parameters',
    'key': 'L40013178_R40014310/2019-03-05T00:00:00Z_L40013178_R40014310_stereo-parameters.json'
}

stereo_params_new = {
    'bucket': 'aquabyte-stereo-parameters',
    'key': 'L40013178_R40014310/2019-07-16T00:00:00Z_L40013178_R40014310_stereo-parameters.json'
}

stereo_params_old_file = s3_access_utils.download_from_s3(stereo_params_old['bucket'], stereo_params_old['key'])
stereo_params_new_file = s3_access_utils.download_from_s3(stereo_params_new['bucket'], stereo_params_new['key'])

# get parameter for old and new stereo parameters
left_maps, right_maps, cameraMatrix1, distCoeffs1, R1, P1, cameraMatrix2, distCoeffs2, R2, P2 = load_params(stereo_params_old_file)
stereo_params_old.update({
    'left_maps': left_maps,
    'right_maps': right_maps,
    'cameraMatrix1': cameraMatrix1,
    'distCoeffs1': distCoeffs1,
    'R1': R1,
    'P1': P1,
    'cameraMatrix2': cameraMatrix2,
    'distCoeffs2': distCoeffs2,
    'R2': R2,
    'P2': P2
})


left_maps, right_maps, cameraMatrix1, distCoeffs1, R1, P1, cameraMatrix2, distCoeffs2, R2, P2 = load_params(stereo_params_new_file)
stereo_params_new.update({
    'left_maps': left_maps,
    'right_maps': right_maps,
    'cameraMatrix1': cameraMatrix1,
    'distCoeffs1': distCoeffs1,
    'R1': R1,
    'P1': P1,
    'cameraMatrix2': cameraMatrix2,
    'distCoeffs2': distCoeffs2,
    'R2': R2,
    'P2': P2
})

stereo_params = json.load(open(stereo_params_new_file))
baseline = abs(stereo_params['TranslationOfCamera2'][0] / 1e3) # convert millimeters to meters and use absolute value
focal_length_pixel = stereo_params['CameraParameters1']['FocalLength'][0]
pixel_size_m = 3.45 * 1e-6
focal_length = focal_length_pixel * pixel_size_m
image_sensor_width = 0.01412
image_sensor_height = 0.01035
pixel_count_width = 4096
pixel_count_height = 3000


for idx, row in df.iterrows():
    # get old keypoint coordinates
    left_keypoints_old = row.left_keypoints
    right_keypoints_old = row.right_keypoints
    
    # generate new left keypoint coordinates based on new rectification
    left_keypoints_new = {}
    for body_part, coordinates in left_keypoints_old.items():
        j, i = coordinates[0], coordinates[1]
        unrectified_coordinates = stereo_params_old['left_maps'][0][i, j]
        rerectified_coordinates = cv2.undistortPoints(
            np.array([[unrectified_coordinates]]).astype(float), 
            stereo_params_new['cameraMatrix1'], 
            stereo_params_new['distCoeffs1'], 
            R=stereo_params_new['R1'], 
            P=stereo_params_new['P1']
        )
        
        i_new, j_new = int(round(rerectified_coordinates[0][0][1])), int(round(rerectified_coordinates[0][0][0]))
        new_coordinates = [j_new, i_new]
        left_keypoints_new[body_part] = new_coordinates
        
    # generate new right keypoint coordinates based on new rectification
    right_keypoints_new = {}
    for body_part, coordinates in right_keypoints_old.items():
        j, i = coordinates[0], coordinates[1]
        unrectified_coordinates = stereo_params_old['right_maps'][0][i, j]
        rerectified_coordinates = cv2.undistortPoints(
            np.array([[unrectified_coordinates]]).astype(float), 
            stereo_params_new['cameraMatrix2'], 
            stereo_params_new['distCoeffs2'], 
            R=stereo_params_new['R2'], 
            P=stereo_params_new['P2']
        )
        
        i_new, j_new = int(round(rerectified_coordinates[0][0][1])), int(round(rerectified_coordinates[0][0][0]))
        new_coordinates = [j_new, i_new]
        right_keypoints_new[body_part] = new_coordinates
        
    # generate new world keypoints
    world_keypoints_new = {}
    for body_part in body_parts:
        lkp = left_keypoints_new[body_part]
        rkp = right_keypoints_new[body_part]

        d = abs(lkp[0] - rkp[0])

        # compute world key point
        depth = depth_from_disp(d, focal_length_pixel, baseline)
        wkp = convert_to_world_point(lkp[0], lkp[1], depth, pixel_count_width, 
                                     pixel_count_height, image_sensor_width, 
                                     image_sensor_height, focal_length)

        world_keypoints_new[body_part] = wkp
        
    left_keypoints_new_list.append(left_keypoints_new)
    right_keypoints_new_list.append(right_keypoints_new)
    world_keypoints_new_list.append(world_keypoints_new)



    
        

    


In [None]:
df['left_keypoints_new'] = left_keypoints_new_list
df['right_keypoints_new'] = right_keypoints_new_list
df['world_keypoints_new'] = world_keypoints_new_list

In [None]:
df[df.gtsf_fish_identifier == '190618010020_vikingfjord-sunde'].right_keypoints_new.iloc[0]


In [None]:
""

In [None]:
{"version": 2, "leftCrop": [{"xCrop": 119, "yCrop": 324, "xFrame": 753, "yFrame": 1542, "keypointType": "UPPER_LIP"}, {"xCrop": 211, "yCrop": 345, "xFrame": 845, "yFrame": 1563, "keypointType": "EYE"}, {"xCrop": 954, "yCrop": 97, "xFrame": 1588, "yFrame": 1315, "keypointType": "DORSAL_FIN"}, {"xCrop": 1642, "yCrop": 174, "xFrame": 2276, "yFrame": 1392, "keypointType": "ADIPOSE_FIN"}, {"xCrop": 1875, "yCrop": 227, "xFrame": 2509, "yFrame": 1445, "keypointType": "UPPER_PRECAUDAL_PIT"}, {"xCrop": 2036, "yCrop": 311, "xFrame": 2670, "yFrame": 1529, "keypointType": "HYPURAL_PLATE"}, {"xCrop": 2150, "yCrop": 314, "xFrame": 2784, "yFrame": 1532, "keypointType": "TAIL_NOTCH"}, {"xCrop": 1861, "yCrop": 387, "xFrame": 2495, "yFrame": 1605, "keypointType": "LOWER_PRECAUDAL_PIT"}, {"xCrop": 1541, "yCrop": 506, "xFrame": 2175, "yFrame": 1724, "keypointType": "ANAL_FIN"}, {"xCrop": 1126, "yCrop": 610, "xFrame": 1760, "yFrame": 1828, "keypointType": "PELVIC_FIN"}, {"xCrop": 435, "yCrop": 524, "xFrame": 1069, "yFrame": 1742, "keypointType": "PECTORAL_FIN"}], "rightCrop": [{"xCrop": 94, "yCrop": 322, "xFrame": 266, "yFrame": 1540, "keypointType": "UPPER_LIP"}, {"xCrop": 170, "yCrop": 341, "xFrame": 342, "yFrame": 1559, "keypointType": "EYE"}, {"xCrop": 910, "yCrop": 94, "xFrame": 1082, "yFrame": 1312, "keypointType": "DORSAL_FIN"}, {"xCrop": 1623, "yCrop": 169, "xFrame": 1795, "yFrame": 1387, "keypointType": "ADIPOSE_FIN"}, {"xCrop": 1859, "yCrop": 224, "xFrame": 2031, "yFrame": 1442, "keypointType": "UPPER_PRECAUDAL_PIT"}, {"xCrop": 2029, "yCrop": 303, "xFrame": 2201, "yFrame": 1521, "keypointType": "HYPURAL_PLATE"}, {"xCrop": 2149, "yCrop": 305, "xFrame": 2321, "yFrame": 1523, "keypointType": "TAIL_NOTCH"}, {"xCrop": 1851, "yCrop": 382, "xFrame": 2023, "yFrame": 1600, "keypointType": "LOWER_PRECAUDAL_PIT"}, {"xCrop": 1526, "yCrop": 501, "xFrame": 1698, "yFrame": 1719, "keypointType": "ANAL_FIN"}, {"xCrop": 1092, "yCrop": 606, "xFrame": 1264, "yFrame": 1824, "keypointType": "PELVIC_FIN"}, {"xCrop": 393, "yCrop": 520, "xFrame": 565, "yFrame": 1738, "keypointType": "PECTORAL_FIN"}]}

In [None]:
new_df = pd.DataFrame()
for idx, row in df.iterrows():
    
    wkps = row.world_keypoints_new
    df_row = {'0': idx}
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            d = euclidean_distance(wkps[body_parts[i]], wkps[body_parts[j]])
            df_row['{0}-{1}'.format(i, j)] = d
    
    df_row['weight'] = row.weight
    df_row['gtsf_fish_identifier'] = row.gtsf_fish_identifier
    new_df = new_df.append(df_row, ignore_index=True)
    

<h1> Clean the data </h1>

In [None]:
df = df[df.gtsf_fish_identifier != '190620-4e4e0640-d4eb-405d-8fcf-57fda11d7660'].copy(deep=True)
new_df = new_df[new_df.gtsf_fish_identifier != '190620-4e4e0640-d4eb-405d-8fcf-57fda11d7660'].copy(deep=True)

<h1> Train model on new calibration data </h1>

In [None]:
def generate_train_mask(df, train_frac, randomize=True):
    x = np.zeros((df.shape[0]), dtype=bool)
    x[:int(train_frac * df.shape[0])] = True
    np.random.shuffle(x)
    mask = pd.Series(x)
    return x
    


In [None]:
# define all features

body_parts_subset = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
])

body_part_indices = [body_parts.index(bp) for bp in body_parts_subset]

pairwise_distance_columns = ['{0}-{1}'.format(x, y) for x, y in list(combinations(body_part_indices, 2))]
interaction_columns_quadratic = []
interaction_columns_cubic = []
for i in range(len(pairwise_distance_columns)):
    for j in range(i, len(pairwise_distance_columns)):
        col1 = pairwise_distance_columns[i]
        col2 = pairwise_distance_columns[j]
        interaction_column = '{},{}'.format(col1, col2)
        new_df[interaction_column] = new_df[col1] * new_df[col2]
        interaction_columns_quadratic.append(interaction_column)
        
for i in range(len(pairwise_distance_columns)):
    for j in range(i, len(pairwise_distance_columns)):
        for k in range(j, len(pairwise_distance_columns)):
            col1 = pairwise_distance_columns[i]
            col2 = pairwise_distance_columns[j]
            col3 = pairwise_distance_columns[k]
            interaction_column = '{},{},{}'.format(col1, col2, col3)
            new_df[interaction_column] = new_df[col1] * new_df[col2] * new_df[col3]
            interaction_columns_cubic.append(interaction_column)
            

In [None]:
np.random.seed(0)

mask = generate_train_mask(new_df, train_frac=0.8)
mask = mask & (~new_df.gtsf_fish_identifier.str.contains('190620'))
columns = pairwise_distance_columns + interaction_columns_quadratic + interaction_columns_cubic

X_train = new_df.loc[mask, columns].values
y_train = new_df.loc[mask, 'weight'].values
X_test = new_df.loc[~mask, columns].values
y_test = new_df.loc[~mask, 'weight'].values

scaler = StandardScaler()
scaler.fit(X_train)
X_train_normalized = scaler.transform(X_train)

pca = PCA(n_components=min(X_train_normalized.shape[0], X_train_normalized.shape[1]))
pca.fit(X_train_normalized)
explained_variance_ratio = pca.explained_variance_ratio_.cumsum()
idx = np.where(explained_variance_ratio > 0.999999)[0][0]

pca = PCA(n_components=idx+1)
pca.fit(X_train_normalized)
X_train_transformed = pca.transform(X_train_normalized)
X_test_normalized = scaler.transform(X_test)
X_test_transformed = pca.transform(X_test_normalized)

reg = LinearRegression().fit(X_train_transformed, y_train)

y_pred = reg.predict(pca.transform(scaler.transform(new_df[columns].values)))
new_df['prediction'] = y_pred
new_df['error'] = new_df.prediction - new_df.weight
new_df['error_pct'] = new_df.error / new_df.weight
new_df['abs_error_pct'] = new_df.error_pct.abs()

new_model = {
    'mean': scaler.mean_,
    'std': scaler.scale_,
    'PCA_components': pca.components_,
    'reg_coef': reg.coef_,
    'reg_intercept': reg.intercept_,
    'body_parts': body_parts_subset
}



In [None]:
np.percentile(np.abs((y_pred - new_df.weight.values)/new_df.weight.values), 50)

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(100 * np.abs((y_pred - new_df.weight.values)/new_df.weight.values), bins=100)
plt.grid()
plt.show()

In [None]:
vs = []
for p in percentiles:
    v = np.percentile(np.abs((y_pred - new_df.weight.values)/new_df.weight.values), p)
    vs.append(v)

plt.figure(figsize=(20, 10))
plt.scatter(percentiles, vs)
plt.grid()
plt.ylim([0, 0.4])
plt.xlabel('Percentile')
plt.ylabel('Percent deviation in biomass')
plt.show()

In [None]:
amg = AccuracyMetricsGenerator(mask.values, df.prediction.values, df.weight.values)

In [None]:
amg.plot_predictions_vs_ground_truth(impose_bounds=True)

In [None]:
amg.display_train_test_accuracy_metrics()

In [None]:
amg = AccuracyMetricsGenerator(mask.values, new_df.prediction.values, new_df.weight.values)

In [None]:
amg.plot_predictions_vs_ground_truth(impose_bounds=True)

In [None]:
amg.display_train_test_accuracy_metrics()

<h1> Disparity value comparison </h1>

In [None]:
disp_diffs = []
for idx, row in df.iterrows():
    left_keypoints_old = row.left_keypoints
    right_keypoints_old = row.right_keypoints
    left_keypoints_new = row.left_keypoints_new
    right_keypoints_new = row.right_keypoints_new
    for body_part in body_parts_subset:
        disp_old = abs(left_keypoints_old[body_part][0] - right_keypoints_old[body_part][0])
        disp_new = abs(left_keypoints_new[body_part][0] - right_keypoints_new[body_part][0])
        disp_diffs.append(disp_new - disp_old)
    

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(np.array(disp_diffs), bins=20)
plt.title('Distribution of disparity differences between pre- and post-Axiom calibrations')
plt.xlabel('Disparity difference between pre- and post-Axiom calibrations')
plt.ylabel('Count')
plt.grid()
plt.show()

<h1> Pairwise Distance Comparison </h1>

In [None]:
analysis_df = pd.DataFrame()
for i in range(len(body_parts)-1):
    for j in range(i+1, len(body_parts)):
        diffs = new_df['{0}-{1}'.format(i, j)] - df['{0}-{1}'.format(i, j)]
        pct_diffs = diffs / df['{0}-{1}'.format(i, j)]
        
        row = {}
        row['pairwise_distance'] = '{0}<->{1}'.format(body_parts[i], body_parts[j])
        percentiles = list(np.arange(0, 100, 10))
        for percentile in percentiles:
            row['diff_{}th_percentile'.format(percentile)] = np.percentile(diffs, percentile)
            row['pct_diff_{}th_percentile'.format(percentile)] = np.percentile(pct_diffs, percentile)
        analysis_df = analysis_df.append(row, ignore_index=True)
        
        

In [None]:
analysis_df.head()

In [None]:
pct_diffs_all = []
for i in range(len(body_parts)-1):
    for j in range(i+1, len(body_parts)):
        diffs = new_df['{0}-{1}'.format(i, j)] - df['{0}-{1}'.format(i, j)]
        pct_diffs = diffs / df['{0}-{1}'.format(i, j)]
        pct_diffs_all.extend(pct_diffs.tolist())
        
pct_diffs_all = np.array(pct_diffs_all)

In [None]:
percentiles = list(np.arange(0, 100, 1))
values = []
for percentile in percentiles:
    value = np.percentile(pct_diffs_all, percentile)
    values.append(value)
    
plt.figure(figsize=(20, 10))
plt.scatter(percentiles, values)
plt.grid()
plt.ylim([0, 0.4])
plt.xlabel('Percentile')
plt.ylabel('Percent deviation in pairwise distance')
plt.show()
    
    

<h1> Figure out worst cases </h1>

In [None]:
analysis_df = df.copy(deep=True)
for i in range(len(body_parts)-1):
    for j in range(i+1, len(body_parts)):
        analysis_df['{0}-{1}_new'.format(i, j)] = new_df['{0}-{1}'.format(i, j)]
        analysis_df['{0}-{1}_diff'.format(i, j)] = new_df['{0}-{1}'.format(i, j)] - df['{0}-{1}'.format(i, j)]
        analysis_df['{0}-{1}_pct_diff'.format(i, j)] = analysis_df['{0}-{1}_diff'.format(i, j)] / df['{0}-{1}'.format(i, j)]

In [None]:
for i in range(len(body_parts)-1):
    for j in range(i+1, len(body_parts)):
        analysis_df[analysis_df['{}-{}_pct_diff'.format(i, j)] 