In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import cv2

import glob
import os
import boto3
from sqlalchemy import create_engine, MetaData, Table, select, and_, func
from sqlalchemy.orm import sessionmaker, relationship, join
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Table, Column, Integer, ForeignKey
from sqlalchemy.orm import relationship
from aquabyte.optics import convert_to_world_point, depth_from_disp, pixel2world, euclidean_distance
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from statsmodels.regression.quantile_regression import QuantReg
import mpld3
from PIL import Image
from copy import copy
from aquabyte.visualize import _normalize_world_keypoints

from scipy.stats import gaussian_kde
from mpl_toolkits.mplot3d import Axes3D
import pickle
from PIL import Image, ImageDraw
from multiprocessing import Pool, Manager

import sys
sys.path.append('/root/alok/repos/cv_research/alok/biomass_estimation/production_data_analysis_v3')
from template_matching import enhance, find_matches_and_homography, adjust_keypoints


pd.set_option('max_columns', 500)
pd.set_option('max_colwidth', 50)

<h1> Download Data </h1>

In [None]:
# AWS credentials
aws_credentials = json.load(open(os.environ["AWS_CREDENTIALS"]))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
                         aws_secret_access_key=aws_credentials["aws_secret_access_key"],
                         region_name="eu-west-1")
s3_access_utils = S3AccessUtils('/root/data')


# prod SQL credentaials
sql_credentials = json.load(open(os.environ["PROD_RESEARCH_SQL_CREDENTIALS"]))
rds_access_utils = RDSAccessUtils(sql_credentials)

sql_query = '''
select * from keypoint_annotations
where pen_id = 7
and keypoints is not NULL;
'''

original_df = rds_access_utils.extract_from_database(sql_query)

In [None]:
def get_world_keypoints(row):
    if 'leftCrop' in row.keypoints and 'rightCrop' in row.keypoints:
        return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)
    else:
        return None
    
original_df['world_keypoints'] = original_df.apply(
    lambda x: get_world_keypoints(x), axis=1
)

In [None]:
# f = '/root/data/temp/results_557ec1732d8bc8bc66951d2ea4e69b935d69b111_model_lateral_only_original_bremnes_data.h5'
f = '/root/data/temp/results_f5cfd03d4622c24879cfa9d5f6427bffc4668205_unweighted_model_3800_vikingfjord_experiment_id_3.h5'
original_df = pd.read_hdf(f, 'table') 

In [None]:
original_df.sort_values('estimated_biomass_g', ascending=False)

In [None]:
idx = 5618
left_image_f, _, _ = s3_access_utils.download_from_url(original_df.left_image_url.iloc[idx])
right_image_f, _, _ = s3_access_utils.download_from_url(original_df.right_image_url.iloc[idx])
keypoints = original_df.keypoints.iloc[idx]
imageL = cv2.imread(left_image_f)
imageR = cv2.imread(right_image_f)


In [None]:
imgL = enhance(imageL)
imgR = enhance(imageR)
good, matchesMask, H = find_matches_and_homography(imgL, imgR)
adjusted_keypoints = adjust_keypoints(keypoints, H)



In [None]:
adjusted_keypoints

In [None]:
def plot_world_keypoints_3D(wkps):
    norm_wkps = _normalize_world_keypoints(wkps)
    body_parts = [k for k in norm_wkps.keys() if k != 'BODY']
    xs = [norm_wkps[bp][0] for bp in body_parts]
    ys = [norm_wkps[bp][1] for bp in body_parts]
    zs = [norm_wkps[bp][2] for bp in body_parts]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlim3d(0, max(xs))
    ax.set_ylim3d(-0.3, 0.3)
    ax.set_zlim3d(-0.3, 0.3)
    ax.scatter(xs, ys, zs, color='blue')

In [None]:
%matplotlib notebook

wkps = original_df.world_keypoints.iloc[idx]
plot_world_keypoints_3D(wkps)

In [None]:
%matplotlib notebook

cm = original_df.camera_metadata.iloc[idx]
adjusted_wkps = pixel2world(adjusted_keypoints['leftCrop'], adjusted_keypoints['rightCrop'], cm)
plot_world_keypoints_3D(adjusted_wkps)

In [None]:
def coord2biomass(world_keypoints, model):
    """from coordinates to biomass"""

    mean = model['mean']
    std= model['std']
    PCA_components = model['PCA_components']
    reg_coef = model['reg_coef']
    reg_intercept = model['reg_intercept']
    body_parts = model['body_parts']
    # calculate pairwise distances for production coord
    # based on the exact ordering reflected in the body_parts
    # variable above

    pairwise_distances = []
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            dist = euclidean_distance(world_keypoints[body_parts[i]], world_keypoints[body_parts[j]])
            pairwise_distances.append(dist)

    interaction_values_quadratic = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            dist1 = pairwise_distances[i]
            dist2 = pairwise_distances[j]
            interaction_values_quadratic.append(dist1 * dist2)

    interaction_values_cubic = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            for k in range(j, len(pairwise_distances)):
                dist1 = pairwise_distances[i]
                dist2 = pairwise_distances[j]
                dist3 = pairwise_distances[k]
                interaction_values_cubic.append(dist1 * dist2 * dist3)


    X = np.array(pairwise_distances + interaction_values_quadratic + interaction_values_cubic)

    X_normalized = (X - model['mean']) / model['std']
    X_transformed = np.dot(X_normalized, model['PCA_components'].T)
    prediction = np.dot(X_transformed, reg_coef) + reg_intercept
    return prediction

In [None]:
model_f = '/root/alok/repos/cv_algorithms/biomass-production/src/model.pkl'
model = pickle.load(open(model_f, 'rb'))


In [None]:
def generate_adjusted_weight(left_image_url, right_image_url, keypoints, cm, kpid, weight_dict):
# def generate_adjusted_weight(row, model):
    try:
        left_image_f, _, _ = s3_access_utils.download_from_url(left_image_url)
        right_image_f, _, _ = s3_access_utils.download_from_url(right_image_url)
        imageL = cv2.imread(left_image_f)
        imageR = cv2.imread(right_image_f)
        good, matchesMask, H = find_matches_and_homography(imageL, imageR)
        adjusted_keypoints = adjust_keypoints(keypoints, H)
        adjusted_wkps = pixel2world(adjusted_keypoints['leftCrop'], 
                                    adjusted_keypoints['rightCrop'],
                                    cm)
        weight = coord2biomass(adjusted_wkps, model)
        weight_dict[kpid] = weight
    except Exception as e:
        print('Error: {}'.format(e))
        
    print(len(weight_dict.keys()))



In [None]:
manager = Manager()
weight_dict = manager.dict()

args = []
for idx, row in original_df.iterrows():
    args.append((row.left_image_url, row.right_image_url, row.keypoints, 
                 row.camera_metadata, row.id, weight_dict))

pool = Pool(processes=20)
pool.starmap(generate_adjusted_weight, args)


In [None]:
for k in weight_dict.keys():
    original_weight = original_df[original_df.id == k].estimated_biomass_g.iloc[0]
    new_weight = weight_dict[k]
    pct_difference = (new_weight - original_weight) / original_weight
    print('Original weight: {}, New weight: {}, Pct. Difference: {}'.format(original_weight, new_weight, pct_difference))

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
weights = np.array(weight_dict.values())
mask = (weights > 0) & (weights < 20000)
plt.hist(weights[mask], bins=20)
plt.grid()
plt.show()

In [None]:
print(np.mean(weights[mask]))

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
weights = original_df.estimated_biomass_g.values
mask = (weights > 0) & (weights < 20000)
plt.hist(weights[mask], bins=20)
plt.grid()
plt.show()

In [None]:
print(np.mean(weights[mask]))

In [None]:
coord2biomass(wkps, model)

In [None]:
coord2biomass(adjusted_wkps, model)