In [None]:
from collections import defaultdict
import random
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import torch
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from research.gtsf_data.gtsf_dataset import GTSFDataset
from research.gtsf_data.gtsf_augmentation import GTSFAugmentation
from research.weight_estimation.weight_estimator import WeightEstimator
from research.weight_estimation.weight_estimator_old import * 
from research.gtsf_data.body_parts import BodyParts

pd.set_option('display.max_rows', 500)

In [None]:
akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
gtsf_dataset = GTSFDataset('2019-02-01', '2019-09-20', akpd_scorer_url)
df = gtsf_dataset.get_prepared_dataset()

In [None]:
gtsf_augmentation = GTSFAugmentation(df[df.akpd_score > 0.9].copy(deep=True))

In [None]:
y_bounds, jitter, trials = (0.5, 2.5), 0, 1
augmented_df = gtsf_augmentation.generate_augmented_dataset(y_bounds, jitter, trials)

In [None]:
normalize_centered_2D_transform = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()

# Get neural network weights from sample training
s3_access_utils = S3AccessUtils('/root/data')
model_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/biomass/trained_models/2019-11-08T00-13-09/nn_epoch_798.pb'
model_f, _, _ = s3_access_utils.download_from_url(model_url)
network = torch.load(model_f)

weight_predictions = []
for idx, row in augmented_df.iterrows():
    input_sample = {
        'keypoints': row.ann,
        'cm': row.cm,
        'stereo_pair_id': 0,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    weight_prediction = network(tensorized_kps['kp_input']).item() * 1e4
    weight_predictions.append(weight_prediction)


In [None]:
model_f = '/root/data/alok/biomass_estimation/playground/nn_epoch_253.pb'
weight_estimator = WeightEstimator(model_f)

preds = []
for idx, row in augmented_df.iterrows():
    pred = weight_estimator.predict(row.ann, row.cm)
    preds.append(pred)
    

In [None]:
augmented_df['old_prediction'] = weight_predictions
augmented_df['new_prediction'] = preds

In [None]:
def generate_error_breakdown(df, vals, field, pred_field, gt_field):
    for idx in range(len(vals) - 1):
        mask = (df[field] > vals[idx]) & (df[field] < vals[idx + 1])
        error_pct = (df.loc[mask, pred_field].mean() - df.loc[mask, gt_field].mean()) / (df.loc[mask, gt_field].mean())
        print('Error percentage for {} in range {} <-> {}%: {}'.format(
            field,
            round(vals[idx], 2), 
            round(vals[idx + 1], 2),
            round(100*error_pct, 2))
        )




In [None]:
generate_error_breakdown(augmented_df, np.arange(0.5, 2.5, 0.1), 'mean_y', 'new_prediction', 'weight')

In [None]:
generate_error_breakdown(augmented_df, np.arange(0.5, 2.5, 0.1), 'mean_y', 'old_prediction', 'weight')