In [None]:
import json
import os
from matplotlib import pyplot as plt
from matplotlib import cm
import numpy as np
from PIL import Image
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import pixel2world
import scipy.stats as stats
import pylab

<h1> Load Data </h1>

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
query = """
    select * from prod.biomass_computations
    where pen_id=4
    and captured_at between '2020-08-15' and '2020-08-25'
    and akpd_score >= 0.99
"""

df = rds.extract_from_database(query)

<h1> Get depth values </h1>

In [None]:
def compute_depth(ann, cm):
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    depth = np.median([wkp[1] for wkp in wkps.values()])
    return depth

In [None]:
count = 0
depths = []
for idx, row in df.iterrows():
    if count % 1000 == 0:
        print('Percentage completion: {}%'.format(round(100 * float(count) / df.shape[0], 2)))
    count += 1
    try:
        depth = compute_depth(row.annotation, row.camera_metadata)
    except TypeError as e:
        depth = None
    depths.append(depth)
    
    
    

In [None]:
df['depth'] = depths

<h1> Basic Exploration </h1>

In [None]:
plt.hist(df.depth, bins=20)
plt.xlabel('Depth (m)')
plt.grid()
plt.show()

In [None]:
plt.hist(df.estimated_weight_g, bins=20)
plt.xlabel('Depth (m)')
plt.grid()
plt.show()

In [None]:
df.depth.describe()

In [None]:
depth_cutoffs = [round(x, 1) for x in np.arange(0.5, 1.8, 0.1)]
mean_weights = []
for d_low, d_high in zip(depth_cutoffs, depth_cutoffs[1:]):
    mask = (df.depth > d_low) & (df.depth <= d_high)
    mean_weight = df[mask].estimated_weight_g.mean()
    mean_weights.append(mean_weight)
    
plt.bar(range(len(mean_weights)), mean_weights)
plt.xticks(range(len(mean_weights)), depth_cutoffs[:-1])
plt.show()

In [None]:
df[df.depth < 1.0].estimated_weight_g.mean()

In [None]:
plt.hist(df[df.depth < 1.0].estimated_k_factor, bins=10)

In [None]:
df[df.depth < 1.0].estimated_weight_g.mean()

In [None]:
df.estimated_weight_g.mean()

In [None]:
stats.probplot(df.estimated_weight_g.values, dist="norm", plot=pylab)