In [None]:
import json
import os
import numpy as np
import pandas as pd
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from research.weight_estimation.keypoint_utils.body_parts import BodyParts
from matplotlib import pyplot as plt

<h1> Load Kjeppevikholmen 2019 AKPD Data </h1>

In [None]:
adf = pd.concat([
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-05,2019-06-12).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-12,2019-06-19).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-19,2019-06-26).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-26,2019-07-03).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-07-03,2019-07-04).csv')
])

<h1> Load Corresponding Manual Data </h1>

In [None]:
s3_access_utils = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

query = """
    SELECT * FROM keypoint_annotations
    WHERE pen_id=5
    AND captured_at BETWEEN '2019-06-05' AND '2019-07-02'
    AND keypoints is not null
    AND keypoints -> 'leftCrop' is not null
    AND keypoints -> 'rightCrop' is not null
    AND is_qa = FALSE;
"""

mdf = rds_access_utils.extract_from_database(query)

<h1> Join two datasets together </h1>

In [None]:
url_intersection = sorted(list(set(mdf.left_image_url).intersection(adf.left_crop_url)))
df = adf[adf.left_crop_url.isin(url_intersection)].sort_values('left_crop_url')
df['manual_keypoints'] = mdf[mdf.left_image_url.isin(url_intersection)].sort_values('left_image_url').keypoints.values
df['camera_metadata'] = mdf[mdf.left_image_url.isin(url_intersection)].sort_values('left_image_url').camera_metadata.values


<h1> Compute Precision of AKPD Scorer </h1>

In [None]:
BODY_PARTS = BodyParts().get_core_body_parts()


# # generate ground-truth score label given manual annotations for a fish and the corresponding
# # AKPD annotations
# def generate_ann_score(akpd_ann, manual_ann, threshold=20):
#     m_dists = []
#     for side in ['leftCrop', 'rightCrop']:
#         m_coords = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in manual_ann[side]}
#         a_coords = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in akpd_ann[side]}

#         # if any of the individual keypoint manhattan distances exceeds threshold,
#         # give score of 0. Else, give score of 1.0
#         for bp in BODY_PARTS:
#             manhattan_dist = np.sum(np.abs(m_coords[bp] - a_coords[bp]))
#             m_dists.append(manhattan_dist)
        
#     return np.max(m_dists)

def generate_ann_score(akpd_ann, manual_ann, threshold=20):
    disp_diffs = []
    m_l_coords = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in manual_ann['leftCrop']}
    a_l_coords = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in akpd_ann['leftCrop']}
    m_r_coords = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in manual_ann['rightCrop']}
    a_r_coords = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in akpd_ann['rightCrop']}

    for bp in BODY_PARTS:
        gt_disp = np.abs(m_l_coords[bp] - m_r_coords[bp])
        p_disp = np.abs(a_l_coords[bp] - a_r_coords[bp])
        disp_diffs.append(np.abs(gt_disp - p_disp))
        
    return np.max(disp_diffs)
        


# get vector of AKPD scores as predictions
preds = df.akpd_score.values

# get vector of ground-truth labels
labels = []
count = 0
for idx, row in df.iterrows():
    manual_ann = row.manual_keypoints
    akpd_ann = json.loads(row.annotation.replace("'", '"'))
    label = generate_ann_score(akpd_ann, manual_ann)
    labels.append(label)
    
    if count % 1000 == 0:
        print('Percentage Complete: {}%'.format(round(100 * count / df.shape[0], 2)))
    count += 1

    



In [None]:
df['label'] = labels

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df[df.akpd_score > 0.99].label, bins=100)
plt.grid()
plt.show()

In [None]:
df[(df.akpd_score > 0.99) & (df.label < 60)].shape[0] / df[df.akpd_score > 0.99].shape[0]

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(np.log(1 - df.akpd_score))
plt.grid()
plt.show()