# GTSF : Ground Truth Weight Stats

In this notebook, we are forecasting the weights by finding the closest blender model

### Look at the volumes created with blender

Load blender data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import cv2

import glob
import os
import boto3
import tempfile
from sqlalchemy import create_engine, MetaData, Table, select, and_, func
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.automap import automap_base
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from scipy.stats import norm
import tqdm
import pickle
from itertools import combinations
from aquabyte.data_access_utils import DataAccessUtils

from PIL import Image, ImageDraw
from multiprocessing import Pool, Manager
import copy
import uuid
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_rows', 500)

data_access_utils = DataAccessUtils()



<h1> Get world keypoint coordinates from GTSF data </h1>

In [None]:
aws_credentials = json.load(open(os.environ["AWS_CREDENTIALS"]))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
                         aws_secret_access_key=aws_credentials["aws_secret_access_key"],
                         region_name="eu-west-1")


sql_credentials = json.load(open(os.environ["SQL_CREDENTIALS"]))
sql_engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(sql_credentials["user"], sql_credentials["password"],
                           sql_credentials["host"], sql_credentials["port"],
                           sql_credentials["database"]))

Session = sessionmaker(bind=sql_engine)
session = Session()

Base = automap_base()
Base.prepare(sql_engine, reflect=True)
Enclosure = Base.classes.enclosures
Calibration = Base.classes.calibrations
GtsfDataCollection = Base.classes.gtsf_data_collections
StereoFramePair = Base.classes.stereo_frame_pairs


<h1> Train linear model with PCA + interaction features </h1>

In [None]:
session.rollback()
gtsf_data_collections_all = session.query(GtsfDataCollection).all()
df = pd.DataFrame()



for idx, row in enumerate(gtsf_data_collections_all):
    if idx % 10 == 0:
        print(idx)
        
    # get fish_id and ground truth metadata
    if row.gtsf_fish_identifier == '190321010002':
        continue
    ground_truth_metadata = json.loads(row.ground_truth_metadata)
    if ground_truth_metadata['data'].get('species') != 'salmon':
        continue

    weight = None
    if 'data' in ground_truth_metadata.keys():
        if 'weight' in ground_truth_metadata['data'].keys():
            weight = ground_truth_metadata['data']['weight']
    df_row = {}
    df_row['gtsf_fish_identifier'] = row.gtsf_fish_identifier
    df_row['date'] = row.date
    df_row['weight'] = weight
    df = df.append(df_row, ignore_index=True)
            
    



<h1> Apply filters </h1>

In [None]:
df_cache = df.copy()
df = df.dropna()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.weight, bins=20)
plt.show()

In [None]:
df[df.weight > 5000].shape

In [None]:
df = df_cache.copy()

In [None]:
def coord2biomass_linear(world_keypoints, model):
    """from coordinates to biomass"""

    mean = model['mean']
    std= model['std']
    PCA_components = model['PCA_components']
    reg_coef = model['reg_coef']
    reg_intercept = model['reg_intercept']
    body_parts = model['body_parts']
    # calculate pairwise distances for production coord
    # based on the exact ordering reflected in the body_parts
    # variable above

    pairwise_distances = []
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            dist = euclidean_distance(world_keypoints[body_parts[i]], world_keypoints[body_parts[j]])
            pairwise_distances.append(dist)

    interaction_values_quadratic = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            dist1 = pairwise_distances[i]
            dist2 = pairwise_distances[j]
            interaction_values_quadratic.append(dist1 * dist2)

    interaction_values_cubic = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            for k in range(j, len(pairwise_distances)):
                dist1 = pairwise_distances[i]
                dist2 = pairwise_distances[j]
                dist3 = pairwise_distances[k]
                interaction_values_cubic.append(dist1 * dist2 * dist3)


    X = np.array(pairwise_distances + interaction_values_quadratic + interaction_values_cubic)

    X_normalized = (X - model['mean']) / model['std']
    X_transformed = np.dot(X_normalized, model['PCA_components'].T)
    prediction = np.dot(X_transformed, reg_coef) + reg_intercept
    return prediction

In [None]:
def apply_filters(left_keypoints, right_keypoints, world_keypoints, baseline_biomass_model):
    filter_out, reason = False, None
    
    # apply y-coordinate deviation filter
    body_parts = sorted(list(left_keypoints.keys()))
    max_y_coordinate_deviation = max([abs(left_keypoints[bp][1] - right_keypoints[bp][1]) for bp in body_parts])
#     print(max_y_coordinate_deviation, max_x_coordinate_deviation)
    if max_y_coordinate_deviation == 2297:
        print(left_keypoints, right_keypoints)
    if (max_y_coordinate_deviation > 25):
        filter_out = True
        reason = 'Y-coordinate deviation too high'
        
    # apply world y-coordinate deviation filter
    norm_wkps = normalize_world_keypoints(world_keypoints)
    y_world_coordinates = [norm_wkps[bp][1] for bp in body_parts]
    max_y_world_coordinate_deviation = max(y_world_coordinates) - min(y_world_coordinates)
    if max_y_world_coordinate_deviation > 0.25:
        filter_out = True
        reason = 'World y-coordinate deviation too high'
        
    # apply baseline biomass model
    baseline_weight_prediction = coord2biomass_linear(world_keypoints, baseline_biomass_model)
    if (baseline_weight_prediction < 0) or (baseline_weight_prediction > 15000):
        filter_out = True
        reason = 'Baseline prediction way too off'
        
    
    return max_y_coordinate_deviation, filter_out, reason


In [None]:
baseline_biomass_model = pickle.load(open('/root/data/alok/biomass_estimation/models/model_v2.pkl', 'rb'))
df['filter_out'] = False
df['reason'] = None
for idx, row in df.iterrows():
    max_y_coordinate_deviation, filter_out, reason = \
        apply_filters(row.left_keypoints, row.right_keypoints, row.world_keypoints, baseline_biomass_model)
    if filter_out:
        df.at[idx, 'max_y_coordinate_deviation'] = max_y_coordinate_deviation
        df.at[idx, 'filter_out'] = True
        df.at[idx, 'reason'] = reason

    

<h1> Visualize Individual Cases </h1>

In [None]:
df[df.gtsf_fish_identifier != '190607010041_bolaks-mjanes'].sort_values('max_y_coordinate_deviation', ascending=False)[['gtsf_fish_identifier', 'max_y_coordinate_deviation']]



In [None]:
def convert_to_grayscale(image, gamma=2.2):
    image = image / 255.0
    Y = 0.2126*image[:, :, 0]**gamma + 0.7152*image[:, :, 1]**gamma + 0.0722*image[:, :, 2]**gamma
    L = 116 * Y**(1.0/3) - 16
    return L

In [None]:
def generate_lateral_keypoints(left_image, right_image, left_keypoints, right_keypoints, world_keypoints, 
                               bp_1='UPPER_LIP', bp_2='TAIL_NOTCH', vertical_search_size=3, window_size=100,
                               min_breadth=0.04, max_breadth=0.2):
    
    for i in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
#         left_extrap_kp = (0.5 * left_keypoints[bp_1] + 0.5 * left_keypoints[bp_2]).astype('int64')
#         right_extrap_kp = (0.5 * right_keypoints[bp_1] + 0.5 * right_keypoints[bp_2]).astype('int64')
        left_extrap_kp = (i * left_keypoints[bp_1] + (1-i) * left_keypoints[bp_2]).astype('int64')
        right_extrap_kp = (i * right_keypoints[bp_1] + (1-i) * right_keypoints[bp_2]).astype('int64')
        bp_1_depth = world_keypoints[bp_1][1]
        bp_2_depth = world_keypoints[bp_2][1]

        # need to determine lower and upper bounds here in a data driven fashion from GTSF data
        # hardcoded values used here
        extrap_kp_max_depth = (bp_1_depth + bp_2_depth) / 2.0 - min_breadth / 2.0
        extrap_kp_min_depth = (bp_1_depth + bp_2_depth) / 2.0 - max_breadth / 2.0

        # Compute the feature descriptor for the extrapolated keypoint in the left image
        extrap_kp_min_disp = disp_from_depth(extrap_kp_max_depth)
        extrap_kp_max_disp = disp_from_depth(extrap_kp_min_depth)

        left_image_grayscale = convert_to_grayscale(left_image)
        right_image_grayscale = convert_to_grayscale(right_image)

        left_box = left_image_grayscale[left_extrap_kp[1]-window_size//2:left_extrap_kp[1]+window_size//2, 
                                        left_extrap_kp[0]-window_size//2:left_extrap_kp[0]+window_size//2]

        min_sad = np.inf
        i_match, j_match = None, None
        for i in range(left_extrap_kp[1]-vertical_search_size//2, left_extrap_kp[1]+vertical_search_size//2):
            for j in range(left_extrap_kp[0]-int(extrap_kp_max_disp), left_extrap_kp[0]-int(extrap_kp_min_disp)):
#             for j in range(right_extrap_kp[0]-100, right_extrap_kp[0]+100):
                right_box = right_image_grayscale[i-window_size//2:i+window_size//2, j-window_size//2:j+window_size//2]
                sad = np.abs(left_box - right_box).sum()
                if sad < min_sad:
                    i_match, j_match = i, j
                    min_sad = sad

        left_keypoints['BODY_{}'.format(i)] = left_extrap_kp
        right_keypoints['BODY_{}'.format(i)] = np.array([j_match, i_match])
    return left_keypoints, right_keypoints

            
            
    



In [None]:
def generate_lateral_keypoints(left_image, right_image, left_keypoints, right_keypoints, world_keypoints, 
                               bp='EYE', horizontal_search_space = 20, vertical_search_size=20, window_size=100,
                               min_breadth=0.04, max_breadth=0.2):
    left_kp = left_keypoints[bp]
    right_kp = right_keypoints[bp]

    left_image_grayscale = convert_to_grayscale(left_image)
    right_image_grayscale = convert_to_grayscale(right_image)
    
    left_box = left_image_grayscale[left_kp[1]-window_size//2:left_kp[1]+window_size//2, 
                                    left_kp[0]-window_size//2:left_kp[0]+window_size//2]
    
    min_sad = np.inf
    i_match, j_match = None, None
    for i in range(right_kp[1]-vertical_search_size//2, right_kp[1]+vertical_search_size//2):
        for j in range(right_kp[0]-horizontal_search_space, right_kp[0]+horizontal_search_space):
            right_box = right_image_grayscale[i-window_size//2:i+window_size//2, j-window_size//2:j+window_size//2]
            sad = np.abs(left_box - right_box).sum()
            if sad < min_sad:
                i_match, j_match = i, j
                min_sad = sad
    
    left_keypoints['BODY'] = left_kp
    right_keypoints['BODY'] = np.array([j_match, i_match])
    return left_keypoints, right_keypoints

            
            
    



In [None]:
def visualize_lateral_keypoint(gtsf_fish_identifier):
    sfp = session.query(StereoFramePair).filter(StereoFramePair.gtsf_fish_identifier == gtsf_fish_identifier).all()[0]
    
    # download left and right images
    left_image_s3_key, right_image_s3_key, s3_bucket = sfp.left_image_s3_key, sfp.right_image_s3_key, sfp.image_s3_bucket
    
    left_image_f = data_access_utils.download_from_s3(s3_bucket, left_image_s3_key)
    right_image_f = data_access_utils.download_from_s3(s3_bucket, right_image_s3_key)
    left_image = plt.imread(left_image_f)
    right_image = plt.imread(right_image_f)

    left_keypoints = json.loads(sfp.left_image_keypoint_coordinates)
    right_keypoints = json.loads(sfp.right_image_keypoint_coordinates)
    world_keypoints = json.loads(sfp.world_keypoint_coordinates)
    
    # convert coordinates from lists to numpy arrays
    left_keypoints = {k: np.array(v) for k, v in left_keypoints.items()}
    right_keypoints = {k: np.array(v) for k, v in right_keypoints.items()}
    world_keypoints = {k: np.array(v) for k, v in world_keypoints.items()}
    
    left_keypoints, right_keypoints = generate_lateral_keypoints(left_image, right_image, left_keypoints, right_keypoints, world_keypoints)
    
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    
    
    for bp, kp in left_keypoints.items():
        axes[0].scatter([kp[0]], [kp[1]], color='red', s=1)
#         axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        
    for bp, kp in right_keypoints.items():
        axes[1].scatter([kp[0]], [kp[1]], color='red', s=1)
#         axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    

    plt.show()

    
    
    

In [None]:
visualize_lateral_keypoint('190509010029')

In [None]:
def plot_gtsf_fish_id(gtsf_fish_identifier, left_keypoints, right_keypoints):

    sfp = session.query(StereoFramePair).filter(StereoFramePair.gtsf_fish_identifier == gtsf_fish_identifier).all()[0]
    
    # download left and right images
    left_image_s3_key, right_image_s3_key, s3_bucket = sfp.left_image_s3_key, sfp.right_image_s3_key, sfp.image_s3_bucket
    
    left_image_f = data_access_utils.download_from_s3(s3_bucket, left_image_s3_key)
    right_image_f = data_access_utils.download_from_s3(s3_bucket, right_image_s3_key)
    left_image = plt.imread(left_image_f)
    right_image = plt.imread(right_image_f)

    left_keypoints = json.loads(sfp.left_image_keypoint_coordinates)
    right_keypoints = json.loads(sfp.right_image_keypoint_coordinates)
    
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    
    
    for bp, kp in left_keypoints.items():
        print(bp, kp)
        axes[0].scatter([kp[0]], [kp[1]], color='red', s=5)
        axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        
    for bp, kp in right_keypoints.items():
        print(bp, kp)
        axes[1].scatter([kp[0]], [kp[1]], color='red', s=5)
        axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    

    plt.show()


In [None]:
plot_gtsf_fish_id('190509010025')
