<h1> Kjeppevikholmen Optical Analysis -- June Growth Trend </h1>

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from aquabyte.visualize import Visualizer
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
import json, os
import cv2
import torch
import pytz
from PIL import Image
import datetime as dt
import dateutil
from collections import defaultdict


In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations
    where pen_id=64
    and keypoints -> 'leftCrop' is not null
    and keypoints -> 'rightCrop' is not null
    and is_qa=FALSE;
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()

# load neural network weights
biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')

In [None]:
def generate_weight(row_id, keypoints, cm):
    
    # run biomass estimation
    input_sample = {
        'keypoints': keypoints,
        'cm': cm,
        'stereo_pair_id': row_id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform_biomass.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    
    return weight_prediction

In [None]:
weights = []
count = 0
for idx, row in df.iterrows():
    if count % 1000 == 0:
        print(count)
    count += 1
    weight = generate_weight(row.id, row.keypoints, row.camera_metadata)
    weights.append(weight)


In [None]:
df['weight'] = weights
# df = df.sort_values('captured_at', ascending=True)
 

In [None]:
df['left_floy_tag'] = df.left_crop_metadata.apply(lambda x: x.get('floyTag'))
df['right_floy_tag'] = df.right_crop_metadata.apply(lambda x: x.get('floyTag'))

In [None]:
mask = (df.left_floy_tag == df.right_floy_tag) & (df.left_floy_tag.notnull())
tag_mask = df.left_floy_tag == 'BWWW'
df[mask & tag_mask].weight.mean()

In [None]:
plt.hist(df[mask & tag_mask].weight)
plt.show()

In [None]:
tag_weight_gt = {
    'BWWB': 2000,
    'BBBW': 1750,
    'BWBB': 810,
    'BBWW': 1708,
    'WBWB': 1950,
    'BBBB': 1952,
    'WBBB': 2044,
    'WWWB': 1834,
    'WWWW': 564,
    'BWWW': 1778,
    'BBWB': 1638,
    'WWBB': 782,
    'WBWW': 2442,
    'WWBW': 1158,
    'W'
}

In [None]:
analysis_data = defaultdict(list)
mask = (df.left_floy_tag == df.right_floy_tag) & (df.left_floy_tag.notnull())
for tag_id, gt_weight in tag_weight_gt.items():
    tag_mask = df.left_floy_tag == tag_id
    pred_weight = df[mask & tag_mask].weight.median()
    pct_difference = (gt_weight - pred_weight)/gt_weight
    analysis_data['pred_weight'].append(pred_weight)
    analysis_data['ground_truth_weight'].append(gt_weight)
    analysis_data['pct_difference'].append(pct_difference)
analysis_df = pd.DataFrame(analysis_data)

In [None]:
analysis_df.pct_difference = 100 * analysis_df.pct_difference
analysis_df

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
ax.hist(analysis_df.pct_difference)
ax.set_title('Percentage Difference Distribution in Floy Tag Data')
ax.set_xlabel('Percentage Difference')
ax.set_ylabel('Frequency')
plt.grid()
plt.show()

In [None]:
df.captured_at.sort_values(ascending=True)

In [None]:
analysis_df.pct_difference.median()