In [None]:
import json, os
import cv2
import torch
from multiprocessing import Pool, Manager
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.akpd import AKPD
from aquabyte.template_matching import find_matches_and_homography
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
from aquabyte.data_loader import KeypointsDataset, NormalizeCentered2D, ToTensor, BODY_PARTS
from torch.utils.data import Dataset, DataLoader
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
from PIL import Image

from aquabyte.akpd_scorer import generate_confidence_score
from keras.models import load_model
import boto3
import pandas as pd
import numpy as np
import plotly.express as px
import time
from matplotlib import pyplot as plt

from collections import defaultdict
import datetime as dt
import json
import numpy as np
from sklearn.linear_model import LinearRegression
from collections import defaultdict



In [None]:
from collections import defaultdict
import json
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# compute daily growth rate via fitting an exponential curve,
# weighting each day by its sample size
def compute_growth_rate(tdf, rdf, start_date, end_date):
    x_values = [(dt.datetime.strptime(k, '%Y-%m-%d') - \
                 dt.datetime.strptime(start_date, '%Y-%m-%d')).days \
                 for k in tdf.index.date.astype(str)]
    X = np.array(x_values).reshape(-1, 1)
    y = np.log(tdf.values)
    reg = LinearRegression().fit(X, y, sample_weight=rdf.values)
    growth_rate = reg.coef_[0]
    trend_score = reg.score(X, y, sample_weight=rdf.values)
    return growth_rate, trend_score


# compute distribution confidence via looking at RMS of percent deviations for qq plot
# of today's distribution against distribution in the remainder of the window
def compute_distribution_confidence(df, start_date, end_date, date):
    mean_adjustment = df[date:date].estimated_weight_g.mean() - df[start_date:end_date].estimated_weight_g.mean()
    x = np.percentile(df[start_date:end_date].estimated_weight_g + mean_adjustment, list(range(100)))
    y = np.percentile(df[date:date].estimated_weight_g, list(range(100)))
    distribution_confidence = np.mean(np.square((x[1:99] - y[1:99]) / y[1:99])) ** 0.5
    return distribution_confidence


# NOTE: we need to think more carefully about this to understand how distribution 
# confidence and trend score affect the minimum sample size we want. Hardcoded for now. 
def compute_minimum_sample_size(distribution_confidence, trend_score):
    return 5000
    
# Smart average is defined as a lookback to a maximum of window_size_d days (currently set to 7),
# or until the minimum sample size is achieved
def compute_smart_average(df, tdf, rdf, date, distribution_confidence, growth_rate, 
                          trend_score, window_size_d, bucket_size=0.1):
    
    dates = sorted(list(tdf.index.date.astype(str)))
    if len(dates) == 1:
        growth_rate = 0.0
    minimum_sample_size = compute_minimum_sample_size(distribution_confidence, trend_score)
    x_values = [(dt.datetime.strptime(date, '%Y-%m-%d') - \
                 dt.datetime.strptime(k, '%Y-%m-%d')).days \
                 for k in tdf.index.date.astype(str)]
    X = np.array(x_values).reshape(-1, 1)
    Y = tdf.values
    N = rdf.values
    
    for i in range(window_size_d):
        if N[np.abs(np.squeeze(X)) <= i].sum() >= minimum_sample_size:
            break
    N[np.abs(np.squeeze(X)) > i] = 0
    
    smart_average = 0.0
    sample_size = 0.0
    adj_weights = []
    total_days = 0
    for x, y, n, this_date in zip(X, Y, N, dates):
        smart_average += np.exp(x * growth_rate) * y * n
        sample_size += n
        if n > 0:
            adj_weights_for_date = \
                list(np.exp(x * growth_rate) * df[this_date:this_date].estimated_weight_g.values)
            adj_weights.extend(adj_weights_for_date)
            total_days += 1
        
    smart_average /= sample_size
    
    adj_weights = np.array(adj_weights)
    distribution = {}
    buckets = [round(x, 1) for x in np.arange(0.0, 1e-3 * adj_weights.max(), bucket_size)]
    for b in buckets:
        low, high = 1e3 * b, 1e3 * (b + bucket_size)
        count = adj_weights[(adj_weights >= low) & (adj_weights < high)].shape[0]
        distribution[b] = count / sample_size
    
    output = {
        'weightMovingAvg': float(smart_average),
        'weightMovingDist': distribution,
        'numMovingAvgBatiFish': sample_size,
        'numMovingAvgLookbackDays': total_days,
        'dailyGrowthRate': growth_rate
    }
    
    return output


# generate date range given current date and window size. If future data
# is available relative to current date, windows where the current date
# is centered are preferred
def compute_date_range(historical_dates, date, window_size_d):
    FMT = '%Y-%m-%d'
    max_num_days = 0
    start_date, end_date = None, None
    for i in range(window_size_d // 2 + 1):
        lower_bound_date = (dt.datetime.strptime(date, FMT) - dt.timedelta(days=window_size_d-1) + \
                            dt.timedelta(days=i)).strftime(FMT)
        upper_bound_date = (dt.datetime.strptime(date, FMT) + dt.timedelta(days=i)).strftime(FMT)
        num_days = ((np.array(historical_dates)  >= lower_bound_date) & \
                    (np.array(historical_dates) <= upper_bound_date)).sum()
        if num_days >= max_num_days:
            start_date, end_date = lower_bound_date, upper_bound_date
            max_num_days = num_days
    
    return start_date, end_date


def compute_metrics(date, records_json, window_size_d=7):
    
    records = json.loads(records_json)
    
    dts, vals = [], []
    for iter_date in records:
        for val in records[iter_date]:
            dts.append(iter_date)
            vals.append(val)

    df = pd.DataFrame(vals, index=pd.to_datetime(dts), columns=['estimated_weight_g'])
    
    # get raw statistics
    raw_avg_weight = df[date:date].estimated_weight_g.mean()
    raw_sample_size = df[date:date].shape[0]
    
    # compute relevant date range
    historical_dates = sorted(list(set(df.index.date.astype(str))))
    start_date, end_date = compute_date_range(historical_dates, date, window_size_d)
    rdf = df[start_date:end_date].estimated_weight_g.resample('D').agg(lambda x: x.shape[0])
    tdf = df[start_date:end_date].estimated_weight_g.resample('D').agg(lambda x: x.mean())
    tdf = tdf[rdf > 0].copy(deep=True)
    rdf = rdf[rdf > 0].copy(deep=True)
    
    growth_rate, trend_score, distribution_confidence = None, None, None
    if start_date < end_date:
        growth_rate, trend_score = compute_growth_rate(tdf, rdf, start_date, end_date)
        distribution_confidence = compute_distribution_confidence(df, start_date, end_date, date)
    smart_average = compute_smart_average(df, tdf, rdf, date, 
                                          distribution_confidence, growth_rate, 
                                          trend_score, window_size_d)
    metadata = {
        'trend_score': trend_score,
        'distribution_confidence': distribution_confidence
    }

    return raw_avg_weight, raw_sample_size, smart_average, metadata

In [None]:
# extract dataframe
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

pen_id, group_id = 88, '88'
query = """
    SELECT * FROM
    prod.biomass_computations bc
    WHERE bc.pen_id={0}
    AND bc.group_id='{1}'
    AND bc.akpd_score > 0.9;
""".format(pen_id, group_id)

query = """
    SELECT * FROM (
      (SELECT * FROM prod.crop_annotation cas
      INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
      WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
      AND cas.annotation_state_id = 3
      AND cas.pen_id=88) a
    RIGHT JOIN 
      (SELECT left_crop_url, estimated_weight_g, akpd_score FROM prod.biomass_computations
      WHERE prod.biomass_computations.captured_at between '2020-02-10' and '2020-02-29'
      AND prod.biomass_computations.akpd_score > 0.9) bc 
    ON 
      (a.left_crop_url=bc.left_crop_url)
    ) x
    WHERE x.captured_at between '2020-02-10' and '2020-02-29'
    AND x.pen_id = 88
    AND x.group_id = '88';
"""

df = rds_access_utils.extract_from_database(query)
df = df.sort_values('captured_at')
df = df[df.akpd_score > 0.9].copy(deep=True)
df.index = pd.to_datetime(df.captured_at)
df['hour'] = df.index.hour


In [None]:
df = df.loc[:,~df.columns.duplicated()]

In [None]:
diffs = []
for idx, row in df.iterrows():
    ann_c = row.annotation
    ann_dict_left_kps_c = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_c['leftCrop']}
    ann_dict_right_kps_c = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_c['rightCrop']}
    these_diffs = []
    for bp in BODY_PARTS:
        diff = ann_dict_left_kps_c[bp][1] - ann_dict_right_kps_c[bp][1]
        these_diffs.append(diff)
    diffs.append(np.mean(these_diffs))


In [None]:
df['y_coordinate_diff'] = diffs
fig, axes = plt.subplots(2, 1, figsize=(10, 10))
axes[0].plot(df.y_coordinate_diff.resample('D', how=lambda x: x.mean()))
axes[0].axhline(0, color='red', linestyle='--')
axes[1].plot(df.y_coordinate_diff.resample('D', how=lambda x: x.shape[0]))
axes[1].set_ylim(bottom=0)
axes[0].grid()
axes[1].grid()
axes[0].set_title('Daily avg y-coordinate deviation')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Y coordinate deviation (pixels)')
axes[1].set_title('Daily Sample Size')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Sample Size')
plt.show()

<h1> Y-Coordinate Deviation Diagnosis </h1>

In [None]:
def load_params(params):
    print("Loading params...")
    cameraMatrix1 = np.array(params['CameraParameters1']['IntrinsicMatrix']).transpose()
    cameraMatrix2 = np.array(params['CameraParameters2']['IntrinsicMatrix']).transpose()

    distCoeffs1 = params['CameraParameters1']['RadialDistortion'][0:2] + \
                   params['CameraParameters1']['TangentialDistortion'] + \
                   [params['CameraParameters1']['RadialDistortion'][2]]
    distCoeffs1 = np.array(distCoeffs1)

    distCoeffs2 = params['CameraParameters2']['RadialDistortion'][0:2] + \
                   params['CameraParameters2']['TangentialDistortion'] + \
                   [params['CameraParameters2']['RadialDistortion'][2]]
    distCoeffs2 = np.array(distCoeffs2)

    R = np.array(params['RotationOfCamera2']).transpose()
    T = np.array(params['TranslationOfCamera2']).transpose()

    imageSize = (4096, 3000)

    # perform rectification
    (R1, R2, P1, P2, Q, leftROI, rightROI) = cv2.stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2, imageSize, R, T, None, None, None, None, None, cv2.CALIB_ZERO_DISPARITY, 0)

    left_maps = cv2.initUndistortRectifyMap(cameraMatrix1, distCoeffs1, R1, P1, imageSize, cv2.CV_16SC2)
    right_maps = cv2.initUndistortRectifyMap(cameraMatrix2, distCoeffs2, R2, P2, imageSize, cv2.CV_16SC2)

    print("Params loaded.")
    return left_maps, right_maps

def rectify_crop(crop, maps, crop_metadata):
    print("Rectifying...")
    new_image = np.zeros([3000, 4096, 3]).astype('uint8')
    lower_left = (crop_metadata['y_coord'] + crop_metadata['height'], crop_metadata['x_coord'])
    upper_right = (crop_metadata['y_coord'], crop_metadata['x_coord'] + crop_metadata['width'])
    new_image[upper_right[0]:lower_left[0], lower_left[1]:upper_right[1], :] = np.array(crop)
    remap = cv2.remap(new_image, maps[0], maps[1], cv2.INTER_LANCZOS4)
    nonzero_indices = np.where(remap > 0)
    y_min, y_max = nonzero_indices[0].min(), nonzero_indices[0].max()
    x_min, x_max = nonzero_indices[1].min(), nonzero_indices[1].max()
    lower_left = (y_max, x_min)
    upper_right = (y_min, x_max)
    rectified_crop = remap[upper_right[0]:lower_left[0], lower_left[1]:upper_right[1], :].copy()

    # construct rectified crop metadata
    rectified_crop_metadata = crop_metadata.copy()
    rectified_crop_metadata['x_coord'] = int(x_min)
    rectified_crop_metadata['y_coord'] = int(y_min)
    rectified_crop_metadata['width'] = int(x_max - x_min)
    rectified_crop_metadata['height'] = int(y_max - y_min)

    print("Rectification done")
    return rectified_crop, rectified_crop_metadata, remap


def create_crop_metadata(raw_crop_f):
    coords = [int(x) for x in os.path.basename(raw_crop_f).replace('.jpg', '').split('_')[-4:]]
    
    crop_metadata = {}
    crop_metadata['x_coord'] = coords[0]
    crop_metadata['y_coord'] = coords[1]
    crop_metadata['width'] = coords[2] - coords[0]
    crop_metadata['height'] = coords[3] - coords[1]
    print(coords[0])
    
    return crop_metadata

def display_crops(left_image, right_image, ann, overlay_keypoints=True, show_labels=False):
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['rightCrop']}
    if overlay_keypoints:
        for bp, kp in left_keypoints.items():
            axes[0].scatter([kp[0]], [kp[1]], color='red', s=1)
            if show_labels:
                axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        for bp, kp in right_keypoints.items():
            axes[1].scatter([kp[0]], [kp[1]], color='red', s=1)
            if show_labels:
                axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
# load crops and metadata
i = 0

# load maps
matlab_stereo_parameters_url = df.camera_metadata.iloc[i]['stereoParametersUrl']
matlab_stereo_params_f, _, _ = s3_access_utils.download_from_url(matlab_stereo_parameters_url)
matlab_stereo_params = json.load(open(matlab_stereo_params_f))
left_maps, right_maps = load_params(matlab_stereo_params)

left_crop_url = df.left_crop_url.iloc[i]
right_crop_url = df.right_crop_url.iloc[i]
ann = df.annotation.iloc[i]

_, bucket, left_raw_crop_key = s3_access_utils.download_from_url(left_crop_url)
left_raw_crop_f = s3_access_utils.download_from_s3('aquabyte-frames-resized-inbound', left_raw_crop_key)
left_raw_crop = Image.open(left_raw_crop_f)

_, bucket, right_raw_crop_key = s3_access_utils.download_from_url(right_crop_url)
right_raw_crop_f = s3_access_utils.download_from_s3('aquabyte-frames-resized-inbound', right_raw_crop_key)
right_raw_crop = Image.open(right_raw_crop_f)

crops_json_key_base = os.path.dirname(left_raw_crop_key)
crops_json_key = os.path.join(crops_json_key_base, 'crops.json')
crops_json_f = s3_access_utils.download_from_s3('aquabyte-frames-resized-inbound', crops_json_key)


left_crop_metadata = create_crop_metadata(left_raw_crop_f)
right_crop_metadata = create_crop_metadata(right_raw_crop_f)

rectified_left_crop, rectified_left_crop_metadata, padded_left_image = rectify_crop(left_raw_crop, left_maps, left_crop_metadata)
rectified_right_crop, rectified_right_crop_metadata, padded_right_image = rectify_crop(right_raw_crop, right_maps, right_crop_metadata)

In [None]:
rectified_left_crop.shape

In [None]:
rectified_left_crop_metadata

In [None]:
rectified_right_crop.shape

In [None]:
rectified_right_crop_metadata

In [None]:
df.left_crop_metadata.iloc[0]

In [None]:
df.right_crop_metadata.iloc[0]

In [None]:
display_crops(rectified_left_crop, rectified_right_crop, ann)

In [None]:
Image.fromarray(padded_left_image).save('/root/data/alok/biomass_estimation/playground/padded_left_image.png')

In [None]:
Image.fromarray(padded_right_image).save('/root/data/alok/biomass_estimation/playground/padded_right_image.png')

In [None]:
left_frame_resize_f = s3_access_utils.download_from_s3('aquabyte-frames-resized-inbound', 
                                                           os.path.join(os.path.dirname(left_raw_crop_key), 'left_frame.resize_512_512.jpg'))
right_frame_resize_f = s3_access_utils.download_from_s3('aquabyte-frames-resized-inbound', 
                                                           os.path.join(os.path.dirname(right_raw_crop_key), 'right_frame.resize_512_512.jpg'))

In [None]:
left_frame_resize = Image.open(left_frame_resize_f)

In [None]:
right_frame_resize = Image.open(right_frame_resize_f)

In [None]:
left_frame_resize_unresize = left_frame_resize.resize((4096, 3000))
right_frame_resize_unresize = right_frame_resize.resize((4096, 3000))
left_frame_resize_unresize_remap = cv2.remap(np.array(left_frame_resize_unresize), left_maps[0], left_maps[1], cv2.INTER_LANCZOS4)
right_frame_resize_unresize_remap = cv2.remap(np.array(right_frame_resize_unresize), right_maps[0], right_maps[1], cv2.INTER_LANCZOS4)
Image.fromarray(left_frame_resize_unresize_remap).save('/root/data/alok/biomass_estimation/playground/left_frame_resize_unresize_remap.png')
Image.fromarray(right_frame_resize_unresize_remap).save('/root/data/alok/biomass_estimation/playground/right_frame_resize_unresize_remap.png')