In [None]:
import json
import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from weight_estimation.body_parts import core_body_parts
from weight_estimation.utils import convert_to_world_point_arr, get_left_right_keypoint_arrs, CameraMetadata

In [None]:
help(get_left_right_keypoint_arrs)

<h1> Load Kjeppevikholmen AKPD annotations </h1>

In [None]:
df = pd.concat([
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-05,2019-06-12).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-12,2019-06-19).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-19,2019-06-26).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-06-26,2019-07-03).csv'),
    pd.read_csv('/root/data/alok/biomass_estimation/playground/output-pen=5/biomass_output,pen=5,range=(2019-07-03,2019-07-04).csv')
])

df = df.sort_values('captured_at')
df['estimated_weight_g'] = df.weight
df.index = pd.to_datetime(df.captured_at)
df['hour'] = df.index.hour


<h1> Load Kjeppevikholmen Manual Annotations & Merge </h1>

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
rds = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

query = """
SELECT * FROM keypoint_annotations
WHERE pen_id=5
AND captured_at BETWEEN '2019-06-05' AND '2019-07-02'
AND keypoints is not null
AND keypoints -> 'leftCrop' is not null
AND keypoints -> 'rightCrop' is not null
AND is_qa = FALSE;
"""

mdf = rds.extract_from_database(query)

In [None]:
url_intersection = sorted(list(set(mdf.left_image_url).intersection(df.left_crop_url)))
tdf = df[df.left_crop_url.isin(url_intersection)].sort_values('left_crop_url')
tdf['manual_keypoints'] = mdf[mdf.left_image_url.isin(url_intersection)].sort_values('left_image_url').keypoints.values
tdf['camera_metadata'] = mdf[mdf.left_image_url.isin(url_intersection)].sort_values('left_image_url').camera_metadata.values


In [None]:
tdf.camera_metadata.iloc[0]

<h1> Compute Jitter Column </h1>

In [None]:
body_parts, depths, diffs_l_x, diffs_r_x, diffs_l_y, diffs_r_y = [], [], [], [], [], []
for idx, row in tdf.iterrows():
    
    if row.akpd_score < 0.01:
        continue
    
    manual_keypoints = row.manual_keypoints
    akpd_keypoints = json.loads(row.annotation)
    
    # compute depth from manual keypoints
    
    cm = row.camera_metadata
    camera_metadata = CameraMetadata(
        baseline_m=cm['baseline'],
        focal_length=cm['focalLength'],
        focal_length_pixel=cm['focalLengthPixel'],
        pixel_count_width=cm['pixelCountWidth'],
        pixel_count_height=cm['pixelCountHeight'],
        image_sensor_width=cm['imageSensorWidth'],
        image_sensor_height=cm['imageSensorHeight'],
    )
    
    
    left_kps, right_kps = get_left_right_keypoint_arrs(manual_keypoints)
    wkps = convert_to_world_point_arr(left_kps, right_kps, camera_metadata)
    depth = np.median(wkps[:, 1])
    
    ann_dict_left_kps_m = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in manual_keypoints['leftCrop']}
    ann_dict_right_kps_m = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in manual_keypoints['rightCrop']}
    ann_dict_left_kps_a = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in akpd_keypoints['leftCrop']}
    ann_dict_right_kps_a = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in akpd_keypoints['rightCrop']}
    for body_part in core_body_parts:
        depths.append(depth)
        body_parts.append(body_part)
        diff_l_x = ann_dict_left_kps_m[body_part][0] - ann_dict_left_kps_a[body_part][0]
        diff_r_x = ann_dict_right_kps_m[body_part][0] - ann_dict_right_kps_a[body_part][0]
        diff_l_y = ann_dict_left_kps_m[body_part][1] - ann_dict_left_kps_a[body_part][1]
        diff_r_y = ann_dict_right_kps_m[body_part][1] - ann_dict_right_kps_a[body_part][1]
        diffs_l_x.append(diff_l_x)
        diffs_r_x.append(diff_r_x)
        diffs_l_y.append(diff_l_y)
        diffs_r_y.append(diff_r_y)
        
    
        
        
        

In [None]:
rdf = pd.DataFrame({
    'body_part': body_parts,
    'depth': depths,
    'diff_l_x': diffs_l_x, 
    'diff_r_x': diffs_r_x,
    'diff_l_y': diffs_l_y,
    'diff_r_y': diffs_r_y
})

<h1> Visualize x-axis diff standard deviation (i.e. jitter) broken down by depth bucket </h1>

In [None]:
from sklearn.linear_model import LinearRegression

def compute_regression_line(buckets, stds):
    lr = LinearRegression()
    X = np.array(range(len(buckets))).reshape(-1, 1)
    y = np.array(stds)
    reg = lr.fit(X, y)
    return float(reg.coef_), float(reg.intercept_)

fig, axes = plt.subplots(2, 4, figsize=(20, 10))
for idx, body_part in enumerate(core_body_parts):
    depth_buckets = np.arange(0.5, 2.2, 0.1)
    buckets, stds = [], []
    for low_depth, high_depth in zip(depth_buckets, depth_buckets[1:]):
        bucket = '{}-{}'.format(round(low_depth, 2), round(high_depth, 2))
        mask = (rdf.depth >= low_depth) & (rdf.depth <= high_depth) & (rdf.body_part == body_part) & \
               (rdf.diff_l_x.abs() < 50)
        std = rdf[mask].diff_l_x.std()

        buckets.append(bucket)
        stds.append(std)
    
    # plot empirical standard deviations broken down by depth bucket
    row, col = idx // 4, idx % 4
    ax = axes[row, col]
    ax.plot(stds, label='empirical jitter std values')
    ax.set_xticks(range(len(buckets)))
    ax.set_xticklabels(buckets, rotation=90)
    ax.grid()
    ax.set_title(body_part)
    ax.set_xlabel('Depth bucket (m)')
    ax.set_ylabel('Jitter standard deviation')
    
    # compute & plot regression line
    m, b = compute_regression_line(buckets, stds)
    x_values = np.array(range(len(buckets)))
    y_values = m * x_values + b
    ax.plot(x_values, y_values, linestyle='dashed', color='red', 
            label='Regression line: std = {}x + {}'.format(round(m, 2), round(b, 2)))
    ax.legend()
    

fig.subplots_adjust(hspace=0.5)

plt.show()

In [None]:
for body_part in core_body_parts:
    depth_buckets = np.arange(0.5, 2.2, 0.1)
    fig, axes = plt.subplots(4, 4, figsize=(20, 10))
    
    idx = 0
    print('QQ plots by depth bucket for body part: {}'.format(body_part))
    for low_depth, high_depth in zip(depth_buckets, depth_buckets[1:]):
        mask = (rdf.depth >= low_depth) & (rdf.depth <= high_depth) & (rdf.body_part == body_part)
        vals = rdf[mask].diff_l_x.values
        
        row, col = idx // 4, idx % 4
        ax = axes[row, col]
        stats.probplot(vals, dist='norm', plot=ax)
        ax.set_title('Depth bucket (m): {}-{}'.format(round(low_depth, 2), round(high_depth, 2)))
        
        idx += 1
        
    fig.subplots_adjust(hspace=0.5)
    plt.show()        
        