<h1> Kjeppevikholmen Optical Analysis -- June Growth Trend </h1>

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import numpy as np
# from aquabyte.visualize import Visualizer
from research.utils.data_access_utils import RDSAccessUtils
from research_lib.utils.data_access_utils import S3AccessUtils
# from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
# from aquabyte.optics import pixel2world, euclidean_distance
# from aquabyte.akpd_scorer import generate_confidence_score
from keras.models import load_model
import json, os
import cv2
import torch
import pytz
from PIL import Image
import datetime as dt
import dateutil
from collections import defaultdict
import boto3


In [None]:
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations
    where pen_id=64
    and keypoints -> 'leftCrop' is not null
    and keypoints -> 'rightCrop' is not null
    and captured_at between '2019-12-10' and '2019-12-12'
    and is_qa=FALSE;
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
class AKPD(object):

    def __init__(self, aws_credentials):
        self.client = boto3.client(
            "sagemaker-runtime", 
            region_name="eu-west-1", 
            aws_access_key_id=aws_credentials['aws_access_key_id'], 
            aws_secret_access_key=aws_credentials['aws_secret_access_key']
        
        )

    def predict_keypoints(self, left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata, camera_metadata):
        body = [{
            'leftCropUrl': left_crop_url,
            'rightCropUrl': right_crop_url,
            'leftCropMetadata': left_crop_metadata,
            'rightCropMetadata': right_crop_metadata,
            'cameraMetadata': camera_metadata,
            'id': 1
        }]

        body_str = json.dumps(body).replace("'", '"')

        resp = self.client.invoke_endpoint(EndpointName='auto-keypoints', ContentType='application/json', Body=body_str)
        akpd_keypoints_str = resp['Body'].read()
        akpd_keypoints = json.loads(akpd_keypoints_str.decode("utf-8"))
        return akpd_keypoints

In [None]:
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()

aws_credentials = json.load(open(os.environ['AWS_CREDENTIALS']))
akpd = AKPD(aws_credentials)

# load neural network weights
akpd_scorer_network = load_model('/root/data/alok/biomass_estimation/playground/akpd_scorer_model_TF.h5') # make this better
biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')

In [None]:
def generate_weight(row_id, keypoints, cm):
    
    # run AKPD scoring network on manual data
    input_sample = {
        'keypoints': keypoints,
        'cm': cm,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    manual_score = generate_confidence_score(input_sample, akpd_scorer_network)
    
    # run biomass estimation on manual data
    input_sample = {
        'keypoints': keypoints,
        'cm': cm,
        'stereo_pair_id': row_id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform_biomass.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    manual_weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    
    akpd_keypoints = akpd.predict_keypoints(
        row.left_image_url, 
        row.right_image_url, 
        row.left_crop_metadata, 
        row.right_crop_metadata, 
        cm
    )[0]
    
    # run AKPD scoring network on AKPD data
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': cm,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    akpd_score = generate_confidence_score(input_sample, akpd_scorer_network)
    
    # run biomass estimation on AKPD data
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': cm,
        'stereo_pair_id': row_id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform_biomass.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    akpd_weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    
    return manual_weight_prediction, manual_score, akpd_weight_prediction, akpd_score

In [None]:
manual_weights, manual_scores, akpd_weights, akpd_scores = [], [], [], []
count = 0
for idx, row in df.iterrows():
    if count % 10 == 0:
        print(count)
    count += 1
    manual_weight, manual_score, akpd_weight, akpd_score = generate_weight(row.id, row.keypoints, row.camera_metadata)
    manual_weights.append(manual_weight)
    manual_scores.append(manual_score)
    akpd_weights.append(akpd_weight)
    akpd_scores.append(akpd_score)

In [None]:
df['manual_weight'] = manual_weights
df['manual_score'] = manual_scores
df['akpd_weight'] = akpd_weights
df['akpd_score'] = akpd_scores
df['left_floy_tag'] = df.left_crop_metadata.apply(lambda x: x.get('floyTag'))
df['right_floy_tag'] = df.right_crop_metadata.apply(lambda x: x.get('floyTag'))

In [None]:
ground_truth = {
    'WWBW': 1085,
    'WWBB': 730,
    'BBBB': 2060,
    'BBBW': 1590,
    'WWWB': 1880,
    'BBWB': 1500
}

In [None]:
err_pcts = []
for idx, row in df[df.score > 0.9].iterrows():
    tag = row.left_floy_tag
    gt_weight = ground_truth[tag]
    pred_weight = row.manual_weight
    err_pct = (pred_weight - gt_weight) / gt_weight
    err_pcts.append(err_pct)

print(np.median(np.abs(err_pcts)))
print(np.mean(np.abs(err_pcts)))
print(np.mean(err_pcts))
print(np.median(err_pcts))

In [None]:
err_pcts = []
for idx, row in df[df.akpd_score > 0.9].iterrows():
    tag = row.left_floy_tag
    gt_weight = ground_truth[tag]
    pred_weight = row.akpd_weight
    err_pct = (pred_weight - gt_weight) / gt_weight
    err_pcts.append(err_pct)

print(np.median(np.abs(err_pcts)))
print(np.mean(np.abs(err_pcts)))
print(np.mean(err_pcts))
print(np.median(err_pcts))

In [None]:
analysis_data = defaultdict(list)
for tag in df[df.score > 0.9].left_floy_tag.unique().tolist():
    mask = (df.left_floy_tag == df.right_floy_tag) & (df.left_floy_tag.notnull())
    tag_mask = df.left_floy_tag == tag
    mean_prediction = df[mask & tag_mask].weight.mean()
    median_prediction = df[mask & tag_mask].weight.median()
    mean_err_pct = np.mean((df[mask & tag_mask].weight - ground_truth[tag]) / ground_truth[tag])
    median_err_pct = np.median((df[mask & tag_mask].weight - ground_truth[tag]) / ground_truth[tag])
    mean_abs_err_pct = np.mean(np.abs((df[mask & tag_mask].weight - ground_truth[tag]) / ground_truth[tag]))
    median_abs_err_pct = np.median(np.abs((df[mask & tag_mask].weight - ground_truth[tag]) / ground_truth[tag]))
    num_samples = df[mask & tag_mask].shape[0]
    analysis_data['tag'].append(tag)
    analysis_data['num_samples'].append(num_samples)
    analysis_data['ground_truth_weight'].append(ground_truth[tag])
    analysis_data['mean_prediction'].append(mean_prediction)
    analysis_data['median_prediction'].append(median_prediction)
    analysis_data['mean_err_pct'].append(mean_err_pct)
    analysis_data['median_err_pct'].append(median_err_pct)
    analysis_data['mean_abs_err_pct'].append(mean_abs_err_pct)
    analysis_data['median_abs_err_pct'].append(median_abs_err_pct)

analysis_df = pd.DataFrame(analysis_data)
analysis_df


In [None]:
analysis_data = defaultdict(list)
for tag in df[df.akpd_score > 0.9].left_floy_tag.unique().tolist():
    mask = (df.left_floy_tag == df.right_floy_tag) & (df.left_floy_tag.notnull())
    tag_mask = df.left_floy_tag == tag
    mean_prediction = df[mask & tag_mask].akpd_weight.mean()
    median_prediction = df[mask & tag_mask].akpd_weight.median()
    mean_err_pct = np.mean((df[mask & tag_mask].akpd_weight - ground_truth[tag]) / ground_truth[tag])
    median_err_pct = np.median((df[mask & tag_mask].akpd_weight - ground_truth[tag]) / ground_truth[tag])
    mean_abs_err_pct = np.mean(np.abs((df[mask & tag_mask].akpd_weight - ground_truth[tag]) / ground_truth[tag]))
    median_abs_err_pct = np.median(np.abs((df[mask & tag_mask].akpd_weight - ground_truth[tag]) / ground_truth[tag]))
    num_samples = df[mask & tag_mask].shape[0]
    analysis_data['tag'].append(tag)
    analysis_data['num_samples'].append(num_samples)
    analysis_data['ground_truth_weight'].append(ground_truth[tag])
    analysis_data['mean_prediction'].append(mean_prediction)
    analysis_data['median_prediction'].append(median_prediction)
    analysis_data['mean_err_pct'].append(mean_err_pct)
    analysis_data['median_err_pct'].append(median_err_pct)
    analysis_data['mean_abs_err_pct'].append(mean_abs_err_pct)
    analysis_data['median_abs_err_pct'].append(median_abs_err_pct)

analysis_df = pd.DataFrame(analysis_data)
analysis_df



In [None]:
plt.hist(df[mask & tag_mask].weight)
plt.show()

In [None]:
v = Visualizer(s3_access_utils, rds_access_utils)

In [None]:
i = 15
kpid = df[mask & tag_mask].id.iloc[i]
print(df[mask & tag_mask].weight.iloc[i])
v.load_data(int(kpid))
v.display_crops()
i += 1

In [None]:
i = 16
kpid = df[mask & tag_mask].id.iloc[i]
print(df[mask & tag_mask].weight.iloc[i])
v.load_data(int(kpid))
v.display_crops()
i += 1

In [None]:
analysis_data = defaultdict(list)
for i in range(df[mask & tag_mask].shape[0]):
    kps = df[mask & tag_mask].keypoints.iloc[i]
    cm = df[mask & tag_mask].camera_metadata.iloc[i]
    weight = df[mask & tag_mask].weight.iloc[i]
    kpid = df[mask & tag_mask].id.iloc[i]
    left_kps, right_kps = kps['leftCrop'], kps['rightCrop']
    wkps = pixel2world(left_kps, right_kps, cm)
    for bp1 in sorted(list(wkps.keys())):
        for bp2 in sorted(list(wkps.keys())):
            if bp1 == bp2:
                continue
            analysis_data['{}-{}'.format(bp1, bp2)].append(euclidean_distance(wkps[bp1], wkps[bp2]))
    analysis_data['weight'].append(weight)
    analysis_data['kpid'].append(kpid)
    
analysis_df = pd.DataFrame(analysis_data)

In [None]:
analysis_df

In [None]:
kpid1, kpid2 = 1043682, 1043530
filter1, filter2 = analysis_df[analysis_df.kpid == kpid1], analysis_df[analysis_df.kpid == kpid2]
filter1.T[filter1.index[0]] - filter2.T[filter2.index[0]]

In [None]:
df[df.id == filter1.kpid.iloc[0]]

In [None]:
v.load_data(kpid1)
v.display_crops()

In [None]:
v.load_data(kpid2)
v.display_crops()

In [None]:
def get_world_keypoints(row):
    if 'leftCrop' in row.keypoints and 'rightCrop' in row.keypoints:
        return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)
    else:
        return None
    
df['world_keypoints'] = df.apply(
    lambda x: get_world_keypoints(x), axis=1
)

df['depth'] = df.world_keypoints.apply(lambda wkp: np.median([x[1] for x in wkp.values()]))

In [None]:
plt.scatter(df[mask & tag_mask].depth, df[mask & tag_mask].weight)

In [None]:
plt.scatter(analysis_df['DORSAL_FIN-PELVIC_FIN'], analysis_df.weight)
plt.show()

In [None]:
analysis_df[analysis_df['DORSAL_FIN-PELVIC_FIN'] > 0.14]

In [None]:
analysis_df[analysis_df['DORSAL_FIN-PELVIC_FIN'] < 0.12]