In [None]:
import json, os
import cv2
import torch
from multiprocessing import Pool, Manager
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.akpd import AKPD
from aquabyte.template_matching import find_matches_and_homography
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
from aquabyte.akpd_scorer import generate_confidence_score
from keras.models import load_model
import boto3
import pandas as pd

import numpy as np
from matplotlib import pyplot as plt
import time

In [None]:
class AKPD(object):

    def __init__(self, aws_credentials):
        self.client = boto3.client(
            "sagemaker-runtime", 
            region_name="eu-west-1", 
            aws_access_key_id=aws_credentials['aws_access_key_id'], 
            aws_secret_access_key=aws_credentials['aws_secret_access_key']
        
        )

    def predict_keypoints(self, left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata, camera_metadata):
        body = [{
            'leftCropUrl': left_crop_url,
            'rightCropUrl': right_crop_url,
            'leftCropMetadata': left_crop_metadata,
            'rightCropMetadata': right_crop_metadata,
            'cameraMetadata': camera_metadata,
            'id': 1
        }]

        body_str = json.dumps(body).replace("'", '"')

        resp = self.client.invoke_endpoint(EndpointName='auto-keypoints', ContentType='application/json', Body=body_str)
        akpd_keypoints_str = resp['Body'].read()
        akpd_keypoints = json.loads(akpd_keypoints_str.decode("utf-8"))
        return akpd_keypoints

In [None]:
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

query = """
    SELECT * FROM
    prod.crop_annotation cas
    INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
    WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
    AND cas.annotation is not null
    AND cas.pen_id=60
    AND cas.group_id='60'
    AND cas.captured_at between '2020-01-02' and '2020-01-30';
"""

df = rds_access_utils.extract_from_database(query)
aws_credentials = json.load(open(os.environ['AWS_CREDENTIALS']))
akpd = AKPD(aws_credentials)

to_tensor_transform = ToTensor()

# initialize data transforms so that we can run inference with biomass neural network
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()

# load neural network weights
biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')
akpd_scorer_network = load_model('/root/data/alok/biomass_estimation/playground/akpd_scorer_model_TF.h5') # make this better

<h1> Function to generate weight prediction and confidence score </h1>

In [None]:
def generate_weight_score(row_id, left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata, akpd_keypoints, cm):
    
    # run AKPD scoring network
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': row.camera_metadata,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    akpd_score = generate_confidence_score(input_sample, akpd_scorer_network)

    # run biomass estimation
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': row.camera_metadata,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform_biomass.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    akpd_weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    
    
    return akpd_score, akpd_weight_prediction


In [None]:
weight_score_dict = {}

args = []
count = 0
for idx, row in df.iterrows():
    left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
    left_crop_metadata, right_crop_metadata = row.left_crop_metadata, row.right_crop_metadata,
    cm = row.camera_metadata
    akpd_keypoints = row.annotation
    row_id = idx
    akpd_score, akpd_weight_prediction = generate_weight_score(row_id, left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata, akpd_keypoints, cm)
    weight_score_dict[row_id] = {
        'akpd_score': akpd_score,
        'akpd_weight_prediction': akpd_weight_prediction,
    }
    
    if count % 100 == 0:
        print(count)
    count += 1

In [None]:
df['akpd_weight'], df['akpd_score'] = np.nan, np.nan
for idx, row in df.iterrows():
    if idx in weight_score_dict.keys():
        df.at[idx, 'akpd_weight'] = weight_score_dict[idx]['akpd_weight_prediction']
        df.at[idx, 'akpd_score'] = weight_score_dict[idx]['akpd_score']


In [None]:
df.index = pd.to_datetime(df.captured_at)
df['hour'] = df.index.hour

In [None]:
rows, cols = 9, 3
fig, axes = plt.subplots(rows, cols, figsize=(30, 60))
dates = sorted(list(set(df.index.date.astype(str).tolist())))
for idx, date in enumerate(dates[:100]):
    row, col = idx // cols, idx % cols
    axes[row, col].plot(df[date].groupby('hour')['akpd_weight'].agg(lambda x: x.shape[0]), color='blue')
    axes[row, col].plot(df[date].groupby('hour')['akpd_weight'].agg(lambda x: x.mean()), color='red')
    axes[row, col].set_title(date)
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df[df.akpd_score > 0.9].akpd_weight.resample('D').agg(lambda x: x.mean()))
plt.grid()
plt.show()

In [None]:
for date in dates:
    plt.hist(df[date][df[date].akpd_score > 0.9].akpd_weight)
    plt.title(date)
    plt.show()

In [None]:
date = '2020-01-12'
hours = list(range(8, 15))
for hour in hours:
    plt.hist(df[date][(df[date].akpd_score > 0.9) & (df[date].hour == hour)].akpd_weight)
    plt.title(df[date][(df[date].akpd_score > 0.9) & (df[date].hour == hour)].akpd_weight.mean())
    plt.show()


In [None]:
elements = []
for idx, row in df[(df.akpd_score > 0.9)].iterrows():
    weight = row.akpd_weight
    date = str(idx.date())
    print(date)
    elements.append({
        'date': date,
        'estimatedWeightG': weight
    })
    

In [None]:
from collections import defaultdict
import datetime as dt
import json
import numpy as np
from sklearn.linear_model import LinearRegression

'''
    This function generates biomass estimate for date given historical record of weight estiamtes
    Assumptions: 
        - records_json is JSON blob in string form as specified in instructions
        - date is of the format 'YYYY-mm-dd'
        - records_json contains up to 30 days of data. The latest date in records_json equals 'date' input
'''

def generate_biomass_estimate(date, records_json, min_lookback=3, max_lookback=5, min_sample_size=3000, bucket_size=0.1):
    records = json.loads(records_json)
    
    # get list of weights by date
    weights_by_date = defaultdict(list)
    for r in records:       
        weights_by_date[r['date']].append(r['estimatedWeightG'])

    # calculate daily growth rate (if more than one day is provided)
    historical_dates = sorted(list(weights_by_date.keys()))
    if len(historical_dates) > 1:
        avg_weights = []
        for h_date in historical_dates:
            avg_weight = np.mean(weights_by_date[h_date])
            avg_weights.append(avg_weight)

        x_values = [(dt.datetime.strptime(date, '%Y-%m-%d') - \
                     dt.datetime.strptime(dates[0], '%Y-%m-%d')).days for date in historical_dates]
        X = np.array(x_values).reshape(-1, 1)
        y = np.log(np.array(avg_weights))
        reg = LinearRegression().fit(X, y)
        growth_rate = reg.coef_[0]
    else:
        growth_rate = 0.0
    
    # calculate moving average weight
    date_idx = historical_dates.index(date)
    lookback, sample_size, adj_weights = 0, 0, []
    while True:
        lookback += 1
        sample_size += len(weights_by_date[historical_dates[date_idx-lookback+1]])
        adj_weights_for_date = list(np.exp(growth_rate*(lookback-1))*np.array(weights_by_date[historical_dates[date_idx-lookback+1]]))
        adj_weights.extend(adj_weights_for_date)
        if date_idx-lookback+1 == 0:
            break
        if ((lookback >= min_lookback) & (sample_size >= min_sample_size)) | (lookback >= max_lookback):
            break
    
    adj_weights = np.array(adj_weights)
    ma_weight = adj_weights.sum() / sample_size
    distribution = {}
    buckets = [round(x, 1) for x in np.arange(0.0, 1e-3 * adj_weights.max(), bucket_size)]
    for b in buckets:
        low, high = 1e3 * b, 1e3 * (b + bucket_size)
        count = adj_weights[(adj_weights >= low) & (adj_weights < high)].shape[0]
        distribution[b] = count / sample_size
    
    output = {
        'weightMovingAvg': ma_weight,
        'weightMovingDist': distribution,
        'numMovingAvgBatiFish': sample_size,
        'numMovingAvgLookbackDays': lookback,
        'dailyGrowthRate': growth_rate
    }
    
    return output
    
    
    

In [None]:
dates = sorted(list(set([e['date'] for e in elements])))
weights = []
for date in dates:
    weights.append(generate_biomass_estimate(date, json.dumps(elements), min_sample_size=3000)['weightMovingAvg'])
    

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(15, 10))
tdf = df[(df.akpd_score > 0.9)].akpd_weight.resample('D', how=lambda x: x.mean())
rdf = df[(df.akpd_score > 0.9)].akpd_weight.resample('D', how=lambda x: len(x))
ax[0].plot(dates, tdf.dropna().values, label='daily averages', color='blue')
ax[0].plot(dates, weights, label='growth-rate-adjusted moving average', color='red')
ax[0].grid()
ax[0].legend()
ax[0].set_title('Vikane Pen ID 60 Biomass Progression')
ax[1].plot(rdf.index, rdf.values)
ax[1].grid()
ax[1].set_title('Vikane Pen ID 60 Sample Sizes')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
mask = df.score > 0.8
plt.hist(df[mask].weight, bins=10)
plt.xlabel('Predicted weight (grams)')
plt.ylabel('Frequency')
plt.title('Predicted weight distribution for IMR - 11/09-11/13')
plt.grid()
plt.show()

In [None]:
def display_crops(left_image_f, right_image_f, left_keypoints, right_keypoints, side='both', overlay_keypoints=True, show_labels=False):
    assert side == 'left' or side == 'right' or side == 'both', \
        'Invalid side value: {}'.format(side)

    if side == 'left' or side == 'right':
        fig, ax = plt.subplots(figsize=(20, 10))
        image_f = left_image_f if side == 'left' else right_image_f
        keypoints = left_keypoints if side == 'left' else right_keypoints
        image = plt.imread(image_f)
        ax.imshow(image)

        if overlay_keypoints:
            for bp, kp in keypoints.items():
#                 ax.scatter([kp[0]], [kp[1]], color='red', s=1)
                ax.scatter([kp[0]], [kp[1]], color='red', s=200, alpha=0.3)
                if show_labels:
                    ax.annotate(bp, (kp[0], kp[1]), color='red')
    else:
        fig, axes = plt.subplots(2, 1, figsize=(20, 20))
        left_image = plt.imread(left_image_f)
        right_image = plt.imread(right_image_f)
        axes[0].imshow(left_image)
        axes[1].imshow(right_image)
        if overlay_keypoints:
            for bp, kp in left_keypoints.items():
#                 axes[0].scatter([kp[0]], [kp[1]], color='red', s=1)
                axes[0].scatter([kp[0]], [kp[1]], color='red', s=200, alpha=0.3)
                if show_labels:
                    axes[0].annotate(bp, (kp[0], kp[1]), color='red')
            for bp, kp in right_keypoints.items():
#                 axes[1].scatter([kp[0]], [kp[1]], color='red', s=1)
                axes[1].scatter([kp[0]], [kp[1]], color='red', s=200, alpha=0.3)
                if show_labels:
                    axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
idx = 24
row = df[mask].sort_values('weight', ascending=False).iloc[idx]

left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
left_image_f, _, _ = s3_access_utils.download_from_url(left_crop_url)
right_image_f, _, _ = s3_access_utils.download_from_url(right_crop_url)
left_crop_metadata, right_crop_metadata = row.left_crop_metadata, row.right_crop_metadata,
cm = row.camera_metadata
row_id = idx

# run AKPD
akpd_keypoints = akpd.predict_keypoints(left_crop_url, right_crop_url, left_crop_metadata, right_crop_metadata)
left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in akpd_keypoints[0]['leftCrop']}
right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in akpd_keypoints[0]['rightCrop']}


# run AKPD scoring network
input_sample = {
    'keypoints': akpd_keypoints[0],
    'cm': cm,
    'stereo_pair_id': row_id,
    'single_point_inference': True
}
nomralized_centered_2D_kps = \
    normalize_centered_2D_transform_akpd.__call__(input_sample)

akpd_normalized_kps = akpd_normalization_transform.__call__(nomralized_centered_2D_kps)
tensorized_kps = to_tensor_transform.__call__(akpd_normalized_kps)
score = akpd_scorer_network(tensorized_kps['kp_input']).item()
display_crops(left_image_f, right_image_f, left_keypoints, right_keypoints, show_labels=True)


In [None]:
left_crop_url

In [None]:
3250 = 2070 * (1+r)**60

In [None]:
((3250/2070)**(1/63.0)) - 1

In [None]:
2070*(1.00718)**(51)