In [None]:
%load_ext autoreload
%autoreload 2

import json
import pandas as pd
import numpy as np
from keras.models import load_model
from research_lib.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from dataset import prepare_gtsf_data, compute_akpd_score
from augment import augment
from weight_estimation.weight_estimator import WeightEstimator
from weight_estimation.utils import CameraMetadata

from matplotlib import pyplot as plt


<h1> Prepare Augmented GTSF Dataset </h1>

<h2> Load raw data </h2>

In [None]:
s3 = S3AccessUtils('/root/data')
akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
akpd_scorer_f, _, _ = s3.download_from_url(akpd_scorer_url)
df1 = prepare_gtsf_data('2019-03-01', '2019-09-20', akpd_scorer_f, 0.5, 1.0)

In [None]:
df2 = prepare_gtsf_data('2020-06-01', '2020-08-20', akpd_scorer_f, 0.5, 1.0)

In [None]:
df = pd.concat([df1, df2])

<h2> Augment the data </h2>

In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=10,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df = augment(df, augmentation_config)


<h1> Generate weight estimates with production model </h1>

In [None]:
weight_model_f, _, _ = s3.download_from_url('https://aquabyte-models.s3-us-west-1.amazonaws.com/biomass/trained_models/2020-11-27T00-00-00/weight_model_synthetic_data.pb')
kf_model_f, _, _ = s3.download_from_url('https://aquabyte-models.s3-us-west-1.amazonaws.com/k-factor/trained_models/2020-08-08T000000/kf_predictor_v2.pb')
weight_estimator = WeightEstimator(weight_model_f, kf_model_f)

pred_weights = []
count = 0

for idx, row in augmented_df.iterrows():
    ann = row.annotation
    camera_metadata = row.camera_metadata
    cm = CameraMetadata(
        focal_length=camera_metadata['focalLength'],
        focal_length_pixel=camera_metadata['focalLengthPixel'],
        baseline_m=camera_metadata['baseline'],
        pixel_count_width=camera_metadata['pixelCountWidth'],
        pixel_count_height=camera_metadata['pixelCountHeight'],
        image_sensor_width=camera_metadata['imageSensorWidth'],
        image_sensor_height=camera_metadata['imageSensorHeight']
    )
    
    weight, _, _ = weight_estimator.predict(ann, cm)
    pred_weights.append(weight)
    
    if count % 1000 == 0:
        print(count)
    count += 1
    

In [None]:
augmented_df['pred_weight'] = pred_weights

<h1> K-factor Bias Analysis </h1>

In [None]:
kf_cutoffs = np.arange(0, 2.5, 0.1)
buckets = []
avg_weight_err_pcts = []
for low_kf, high_kf in zip(kf_cutoffs, kf_cutoffs[1:]):
    bucket = '{}-{}'.format(round(low_kf, 1), round(high_kf, 1))
    buckets.append(bucket)
    mask = (augmented_df.kf >= low_kf) & (augmented_df.kf < high_kf)
    avg_pred_weight = augmented_df[mask].pred_weight.mean()
    avg_ground_truth_weight = augmented_df[mask].weight.mean()
    avg_weight_err_pct = 100 * (avg_pred_weight - avg_ground_truth_weight) / avg_ground_truth_weight
    avg_weight_err_pcts.append(avg_weight_err_pct)
    


In [None]:
plt.figure(figsize=(15, 8))
plt.bar(buckets, avg_weight_err_pcts)
plt.xlabel('Ground Truth K-factor bucket')
plt.ylabel('Average weight error percentage (%)')
plt.grid()
plt.show()
