In [None]:
import json
import os
from research.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import pixel2world
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
query = """
    select *
    from prod.biomass_computations
    where pen_id=88
    and captured_at between '2020-02-20' and '2020-03-01'
    and akpd_score > 0.99;
"""

In [None]:
df = rds.extract_from_database(query)

In [None]:
body_lengths = []
head_lengths = []
body_heights = []
anal_heights = []

count = 0
for idx, row in df.iterrows():
    if count % 1000 == 0:
        print(count / df.shape[0])
    count += 1
    ann = row.annotation
    cm = row.camera_metadata
    if ann and ann.get('leftCrop') and ann.get('rightCrop'):
        world_coordinates = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        body_length = np.linalg.norm(world_coordinates['UPPER_LIP'] - world_coordinates['TAIL_NOTCH'])
        head_length = np.linalg.norm(world_coordinates['UPPER_LIP'] - world_coordinates['PECTORAL_FIN'])
        anal_height = np.linalg.norm(world_coordinates['ADIPOSE_FIN'] - world_coordinates['ANAL_FIN'])
        body_height = np.linalg.norm(world_coordinates['DORSAL_FIN'] - world_coordinates['PELVIC_FIN'])

        body_lengths.append(body_length)
        head_lengths.append(head_length)
        anal_heights.append(anal_height)
        body_heights.append(body_height)
    else:
        body_lengths.append(None)
        head_lengths.append(None)
        anal_heights.append(None)
        body_heights.append(None)
    
df['body_length'] = body_lengths
df['head_length'] = head_lengths
df['anal_height'] = anal_heights
df['body_height'] = body_heights
    
    


In [None]:
df['body_ratio'] = df.head_length / df.body_length
df['height_ratio'] = df.anal_height / df.body_height
df['mean_ratio'] = 0.5 * (df.body_ratio + df.height_ratio)

In [None]:
mask = df.body_ratio < 0.3
for idx, row in df.ix[mask, ['left_crop_url', 'mean_ratio']].sort_values('mean_ratio', ascending=False).head(100).iterrows():
    print(row.left_crop_url)

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.body_ratio, bins=100)
plt.grid()
plt.show()