In [None]:
import json
import os
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from filter_optimization.filter_optimization_task import _add_date_hour_columns
import matplotlib.mlab as mlab
from scipy.stats import norm

<h1> Load Data </h1>

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
query = """
    select * from prod.biomass_computations
    where pen_id=56
    and captured_at between '2020-08-21' and '2020-08-30'
    and akpd_score >= 0.9
"""

df = rds.extract_from_database(query)

In [None]:
df = _add_date_hour_columns(df)
hour_mask = (df.hour >= 7) & (df.hour <= 15)
akpd_mask = (df.akpd_score > 0.99)
kf_mask = (df.estimated_k_factor > 1.135)

In [None]:
df[hour_mask & akpd_mask].estimated_weight_g.mean()

In [None]:
df[hour_mask].estimated_weight_g.mean()

In [None]:
df[kf_mask & akpd_mask].estimated_weight_g.mean()

In [None]:
df[akpd_mask].estimated_weight_g.mean()

<h1> Histogram of weights below minimum acceptable weight </h1>

In [None]:
# hour_mask = (df.hour >= 7) & (df.hour <= 15)
akpd_mask = (df.akpd_score > 0.99)

plt.figure(figsize=(20, 10))
plt.hist(df.estimated_weight_g, bins=100)
plt.hist(df[~kf_mask].estimated_weight_g, bins=100, color='red')
plt.axvline(1120, color='red', linestyle='--')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df[hour_mask].estimated_weight_g, bins=100)
# plt.hist(df[~hour_mask].estimated_k_factor, bins=100, color='red', alpha=0.5)
# plt.axvline(1120, color='red', linestyle='--')
plt.grid()
plt.show()

<h1> Visualize AKPD predictions </h1>

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))

In [None]:
def convert_ann_to_dict_form(ann):
    left_keypoints = {item['keypointType']: np.array([item['xCrop'], item['yCrop']]) for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: np.array([item['xCrop'], item['yCrop']]) for item in ann['rightCrop']}
    return left_keypoints, right_keypoints
    
def display_crops(left_image, right_image, ann, overlay_keypoints=True, show_labels=False):
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    
    left_keypoints, right_keypoints = convert_ann_to_dict_form(ann)
    if overlay_keypoints:
        for bp, kp in left_keypoints.items():
            axes[0].scatter([kp[0]], [kp[1]], color='red', s=5)
            if show_labels:
                axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        for bp, kp in right_keypoints.items():
            axes[1].scatter([kp[0]], [kp[1]], color='red', s=5)
            if show_labels:
                axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
mask = df.estimated_weight_g < 1120
num_to_display = 20
for idx, row in df[mask].sort_values('estimated_weight_g', ascending=True).head(num_to_display).iterrows():
    left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
    ann = row.annotation
    left_crop_f, _, _ = s3.download_from_url(left_crop_url)
    right_crop_f, _, _ = s3.download_from_url(right_crop_url)
    left_crop = Image.open(left_crop_f)
    right_crop = Image.open(right_crop_f)
    display_crops(left_crop, right_crop, ann)
    

<h1> Gaussian Curve Fit </h1>

In [None]:
df = _add_date_hour_columns(df)

In [None]:
hour_mask = (df.hour >= 8) & (df.hour <= 15)
vals = df[hour_mask].estimated_weight_g.values
kfs = df[hour_mask].estimated_k_factor.values

mu, sigma = norm.fit(vals[kfs > 1.135])

plt.figure(figsize=(20, 10))
n, bins, patches = plt.hist(vals, 60, normed=1, facecolor='green', alpha=0.75)
y = mlab.normpdf(bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=2)
plt.show()


In [None]:
mean = df[hour_mask].estimated_weight_g.mean()
std = df[hour_mask].estimated_weight_g.std()


In [None]:
from research.weight_estimation.keypoint_utils.optics import pixel2world

def _add_depth(df):
    depths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        try:
            world_keypoints = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
            depth = np.median([x[1] for x in world_keypoints.values()])
        except TypeError as err:
            depth = None
        depths.append(depth)
    df['depth'] = depths
    return df

In [None]:
df = _add_depth(df)

In [None]:
cutoffs = np.arange(0.8, 1.01, 0.01)
weights = []
depths = []
for lo, hi in zip(cutoffs, cutoffs[1:]):
    mask = (df.akpd_score >= lo) #& (df.akpd_score < hi)
    depth = df[mask].depth.mean()
    weight = df[mask].estimated_weight_g.mean()
    weights.append(weight)
    depths.append(depth)

In [None]:
depths

In [None]:
weights

In [None]:
plt.hist(df.akpd_score, bins=20)
plt.grid()
plt.show()