In [20]:
import json, os
import cv2
import torch
from multiprocessing import Pool, Manager
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.akpd import AKPD
from aquabyte.template_matching import find_matches_and_homography
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
from aquabyte.akpd_scorer import generate_confidence_score
from keras.models import load_model
import boto3
import pandas as pd

import numpy as np
from matplotlib import pyplot as plt
import time

In [21]:
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

df = pd.read_csv('/root/data/alok/biomass_estimation/playground/biomass.csv-61-00-from-2019-09-13-to-2019-09-23.csv')

aws_credentials = json.load(open(os.environ['AWS_CREDENTIALS']))
akpd = AKPD(aws_credentials)

to_tensor_transform = ToTensor()

# initialize data transforms so that we can run inference with biomass neural network
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()

# load neural network weights
biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')
akpd_scorer_network = load_model('/root/data/alok/biomass_estimation/playground/akpd_scorer_model_TF.h5') # make this better

In [22]:
df.index = pd.to_datetime(df.captured_at)

In [4]:
{"baseline": 0.101865756211485, "focalLength": 0.01382369154207327, "pixelCountWidth": 4096, "focalLengthPixel": 4006.8671136444264, "imageSensorWidth": 0.01412, "pixelCountHeight": 3000, "imageSensorHeight": 0.01035, "stereoParametersUrl": "http://aquabyte-stereo-parameters.s3.eu-west-1.amazonaws.com/L40013180_R40029775/2020-01-14T00:00:00Z_L40013180_R40029775_stereo-parameters.json"}

{'baseline': 0.101865756211485,
 'focalLength': 0.01382369154207327,
 'pixelCountWidth': 4096,
 'focalLengthPixel': 4006.8671136444264,
 'imageSensorWidth': 0.01412,
 'pixelCountHeight': 3000,
 'imageSensorHeight': 0.01035,
 'stereoParametersUrl': 'http://aquabyte-stereo-parameters.s3.eu-west-1.amazonaws.com/L40013180_R40029775/2020-01-14T00:00:00Z_L40013180_R40029775_stereo-parameters.json'}

In [5]:
{"baseline": 0.10175483968715139, "focalLength": 0.013902841180637544, "pixelCountWidth": 4096, "focalLengthPixel": 4029.809037865955, "imageSensorWidth": 0.01412, "pixelCountHeight": 3000, "imageSensorHeight": 0.01035, "stereoParametersUrl": "http://aquabyte-stereo-parameters.s3.eu-west-1.amazonaws.com/L40029792_R40029775/2019-08-14T00:00:00Z_L40029792_R40029775_stereo-parameters.json"}

{'baseline': 0.10175483968715139,
 'focalLength': 0.013902841180637544,
 'pixelCountWidth': 4096,
 'focalLengthPixel': 4029.809037865955,
 'imageSensorWidth': 0.01412,
 'pixelCountHeight': 3000,
 'imageSensorHeight': 0.01035,
 'stereoParametersUrl': 'http://aquabyte-stereo-parameters.s3.eu-west-1.amazonaws.com/L40029792_R40029775/2019-08-14T00:00:00Z_L40029792_R40029775_stereo-parameters.json'}

In [24]:
df2.camera_metadata.iloc[0]

{'baseline': 0.101865756211485,
 'focalLength': 0.01382369154207327,
 'pixelCountWidth': 4096,
 'focalLengthPixel': 4006.8671136444264,
 'imageSensorWidth': 0.01412,
 'pixelCountHeight': 3000,
 'imageSensorHeight': 0.01035,
 'stereoParametersUrl': 'http://aquabyte-stereo-parameters.s3.eu-west-1.amazonaws.com/L40013180_R40029775/2020-01-14T00:00:00Z_L40013180_R40029775_stereo-parameters.json'}

<h1> Generate Other DF </h1>

In [23]:
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

query = """
    SELECT * FROM
    prod.crop_annotation cas
    INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
    WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
    AND cas.annotation is not null
    AND cas.pen_id=61
    AND cas.group_id='staging-61'
    AND cas.captured_at between '2019-09-13' and '2019-09-21';
"""

df2 = rds_access_utils.extract_from_database(query)
aws_credentials = json.load(open(os.environ['AWS_CREDENTIALS']))
akpd = AKPD(aws_credentials)

to_tensor_transform = ToTensor()

# initialize data transforms so that we can run inference with biomass neural network
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()

# load neural network weights
biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')
akpd_scorer_network = load_model('/root/data/alok/biomass_estimation/playground/akpd_scorer_model_TF.h5') # make this better

<h1> Function to generate weight prediction and confidence score </h1>

In [7]:
def generate_weight_score(row_id, left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata, akpd_keypoints, cm):
    
    # run AKPD scoring network
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': row.camera_metadata,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    akpd_score = generate_confidence_score(input_sample, akpd_scorer_network)

    # run biomass estimation
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': row.camera_metadata,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform_biomass.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    akpd_weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    
    
    return akpd_score, akpd_weight_prediction


In [8]:
weight_score_dict = {}

args = []
count = 0
for idx, row in df2.iterrows():
    left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
    left_crop_metadata, right_crop_metadata = row.left_crop_metadata, row.right_crop_metadata,
    cm = row.camera_metadata
    akpd_keypoints = row.annotation
    row_id = idx
    akpd_score, akpd_weight_prediction = generate_weight_score(row_id, left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata, akpd_keypoints, cm)
    weight_score_dict[row_id] = {
        'akpd_score': akpd_score,
        'akpd_weight_prediction': akpd_weight_prediction,
    }
    
    if count % 100 == 0:
        print(count)
    count += 1

0
100


  depth = focal_length_pixel * baseline / np.array(disp)
  stabilized_kp_info = [0.5 * wkp[0]/wkp[1], 0.5 * wkp[2]/wkp[1], 0.5 * 0.1/wkp[1]]
  theta = np.arctan(upper_lip_y / upper_lip_x)


200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700


In [19]:
df2.camera_metadata.iloc[0]

{'baseline': 0.101865756211485,
 'focalLength': 0.01382369154207327,
 'pixelCountWidth': 4096,
 'focalLengthPixel': 4006.8671136444264,
 'imageSensorWidth': 0.01412,
 'pixelCountHeight': 3000,
 'imageSensorHeight': 0.01035,
 'stereoParametersUrl': 'http://aquabyte-stereo-parameters.s3.eu-west-1.amazonaws.com/L40013180_R40029775/2020-01-14T00:00:00Z_L40013180_R40029775_stereo-parameters.json'}

In [9]:
df2['akpd_weight'], df2['akpd_score'] = np.nan, np.nan
for idx, row in df.iterrows():
    if idx in weight_score_dict.keys():
        df2.at[idx, 'akpd_weight'] = weight_score_dict[idx]['akpd_weight_prediction']
        df2.at[idx, 'akpd_score'] = weight_score_dict[idx]['akpd_score']

df2.index = pd.to_datetime(df2.captured_at)

In [10]:
tdf1 = df[(df.captured_at > '2019-09-13') & (df.captured_at < '2019-09-21')].copy(deep=True)
tdf2 = df2.copy(deep=True)
tdf1['adj_left_crop_url'] = tdf1.left_crop_url.apply(lambda x: x.replace('dev2', 'staging'))
common_urls = list(set(tdf1.adj_left_crop_url).intersection(set(tdf2.left_crop_url)))
tdf1 = tdf1[tdf1.adj_left_crop_url.isin(common_urls)].copy(deep=True)
tdf2 = tdf2[tdf2.left_crop_url.isin(common_urls)].copy(deep=True)


In [15]:
mask = (tdf1.akpd_score > 0.9) & (tdf2.akpd_score > 0.9)
tdf1[mask].weight - tdf2[mask].akpd_weight

  


Series([], dtype: float64)

In [14]:
tdf1[tdf1.akpd_score > 0.9].weight.mean()


3046.0537992878362

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(tdf1[mask].weight - tdf2[mask].akpd_weight, bins=100)
plt.grid()
plt.show()

In [None]:
diffs = tdf1[mask].weight - tdf2[mask].akpd_weight
pct_diffs = diffs / tdf2[mask].akpd_weight

In [None]:
pct_diffs.abs().mean()

In [None]:
tdf1[tdf1.akpd_score > 0.9].head(1000).weight.mean()

In [None]:
tdf2[tdf2.akpd_score > 0.9].head(1000).akpd_weight.mean()

In [None]:
cols = ['site_id', 'pen_id', 'left_crop_url', 'right_crop_url', 'captured_at']
tdf1.ix[tdf1.akpd_score > 0.9, cols].head(1000).to_csv('/root/data/alok/biomass_estimation/playground/calibration_input_data.csv')

In [None]:
kdf = pd.read_csv('/root/data/alok/biomass_estimation/playground/calibration_input_data.csv')

In [None]:
tdf2.columns

In [None]:
cols = ['pen_id', 'annotation_state_id', 'base_key', 'url_key',
       'right_crop_url', 'left_crop_metadata', 'right_crop_metadata',
       'camera_metadata', 'captured_at', 'ts_source_start',
       'ts_source_end', 'ts', 'annotator_email', 'skip_reasons', 'group_id',
       'left_crop_url', 'filter_reasons', 'id', 'state', 'description',
       'raw_left_crop_url', 'raw_right_crop_url']
tdf2.loc[tdf2.akpd_score > 0.9, cols].head(1000).to_csv('/root/data/alok/biomass_estimation/playground/calibration_input_data.csv')

In [None]:
tdf2.right_crop_url.iloc[0]

In [None]:
tdf2.base_key.iloc[0]

In [None]:
tdf2['raw_left_crop_url'] = tdf2.left_crop_url.apply(lambda x: x.replace('aquabyte-crops', 'aquabyte-frames-resized-inbound'))
tdf2['raw_right_crop_url'] = tdf2.right_crop_url.apply(lambda x: x.replace('aquabyte-crops', 'aquabyte-frames-resized-inbound'))

In [None]:
tdf2.raw_left_crop_url.iloc[-1]