In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer, _normalize_world_keypoints
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool
import datetime as dt
import pytz
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')
from mpl_toolkits.mplot3d import Axes3D
import cv2

import matplotlib
font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 22}

matplotlib.rc('font', **font)

import matplotlib.cm as cm
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Predicted Unconditional Weight Histogram for experiment ID #1</h1>

In [None]:
# f = '/root/data/temp/results_557ec1732d8bc8bc66951d2ea4e69b935d69b111_model_lateral_only_horizontal_vertical_constraints_research-exp-id-01-vikingfjord-20190628-20190630.h5'
f = '/root/data/temp/results_557ec1732d8bc8bc66951d2ea4e69b935d69b111_model_lateral_only_horizontal_vertical_constraints_research-exp-id-03-vikingfjord-20190709-20190710.h5'
df = pd.read_hdf(f, 'table')

# plot results
plt.figure(figsize=(20, 10))
mask = (df.estimated_biomass_g > -2000) & (df.estimated_biomass_g < 20000)
plt.hist(df[mask].estimated_biomass_g, bins=20, color='blue', label='4 eigenvectors', alpha=1.0)
plt.axvline(6440, color='red')
plt.title('Predicted biomass distribution for Waiting Pen Experiment ID #1')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

<h1> Create features for lateral filter </h1>

In [None]:
df['rms_error_m'] = np.nan

rms_error_ms, coeffs = [], []
horizontal_angles, vertical_angles = [], []
for idx, row in df.iterrows():
    try:
        # fit plane based on well-behaved points
        X, y = [], []
        for body_part in ['UPPER_LIP', 'HYPURAL_PLATE', 'ADIPOSE_FIN', 'ANAL_FIN']:
            wkp = row.world_keypoints[body_part]
            X.append([
                wkp[0],
                wkp[2]
            ])
            y.append(wkp[1])

        X, y = np.array(X), np.array(y)
        reg = LinearRegression().fit(X, y)
        coeffs.append(reg.coef_)
        vertical_angles.append(np.arctan(reg.coef_[0]) * 180.0 / np.pi)
        horizontal_angles.append(np.arctan(reg.coef_[1]) * 180.0 / np.pi)
        
        # test plane
        X, y = [], []
        for body_part, wkp in row.world_keypoints.items():
            X.append([
                wkp[0],
                wkp[2]
            ])
            y.append(wkp[1])
        X, y = np.array(X), np.array(y)
        rms_error_m = np.linalg.norm(reg.predict(X) - y) / y.shape[0]
        rms_error_ms.append(rms_error_m)
    except Exception as e:
        print(e)
        rms_error_ms.append(None)
        horizontal_angles.append(None)
        vertical_angles.append(None)

df['rms_error_m'] = rms_error_ms
df['horizontal_angle'] = horizontal_angles
df['vertical_angle'] = vertical_angles


<h1> Predicted Unconditional / Conditional Histograms Overlayed </h1>

In [None]:
%matplotlib inline
hard_outlier_mask = (df.estimated_biomass_g < 0) | (df.estimated_biomass_g > 20000)
good_annotation_mask = df.rms_error_m < 0.1
lateral_mask = (df.horizontal_angle.abs() < 40) & (df.vertical_angle.abs() < 20)

# plot results
plt.figure(figsize=(20, 10))
# plt.hist(df[~hard_outlier_mask].estimated_biomass_g, bins=20, color='blue', label='unconditional', alpha=0.5)
plt.hist(df[~hard_outlier_mask & good_annotation_mask & lateral_mask].estimated_biomass_g, bins=20, color='red', label='conditional', alpha=0.5)
plt.axvline(5710, color='red')
plt.title('Predicted biomass distribution for Waiting Pen Experiment ID #1')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

In [None]:
df[~hard_outlier_mask & good_annotation_mask & lateral_mask].shape

In [None]:
df[~hard_outlier_mask & good_annotation_mask & lateral_mask].estimated_biomass_g.mean()

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
plt.hist(df.horizontal_angle)
plt.grid()
plt.show()

In [None]:
df.sort_values('rms_error_m', ascending=False).head(20)

<h1> Visualize Individual Cases </h1>

In [None]:
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
v = Visualizer(rds_access_utils=rds_access_utils, s3_access_utils=s3_access_utils)

In [None]:
%matplotlib inline
for idx, row in df.sample(100).sort_values('rms_error_m', ascending=False).iterrows():
    print(row.id, row.rms_error_m)
    v.load_data(row.id)
    v.display_crops(overlay_keypoints=True, show_labels=False)

In [None]:
v.load_data(554319)
v.display_crops(overlay_keypoints=True, show_labels=False)

In [None]:
%matplotlib notebook
v.load_data(558443)
v.display_3d_keypoints()

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
plt.hist(df.rms_error_m, bins=100)
plt.grid()
plt.show()

<h1> Optical Sampling Bias </h1>

In [None]:
def centroid_depth(wkps):
    if wkps:
        return np.mean(np.array([wkp[1] for wkp in wkps.values()]))
    return None

df['centroid_depth'] = df.world_keypoints.apply(lambda x: centroid_depth(x))

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
plt.hist(df.centroid_depth)
plt.grid()
plt.show()

In [None]:
%matplotlib inline
depths = list(np.arange(0.8, 2.0, 0.1))
mean_rms_values = []
for i in range(len(depths[:-1])):
    mask = (df.centroid_depth > depths[i]) & (df.centroid_depth < depths[i+1])
    mean_rms_value = df[mask].rms_error_m.mean()
    mean_rms_values.append(mean_rms_value)
    

plt.figure(figsize=(20, 10))
x = np.arange(len(depths[:-1]))
plt.bar(x, mean_rms_values)
plt.xticks(x, [round(d, 2) for d in depths[:-1]])
plt.show()
    

In [None]:
mean_rms_values

In [None]:
depths

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
hard_outlier_mask = (df.estimated_biomass_g < -10000) | (df.estimated_biomass_g > 40000)
plt.hist(df[~hard_outlier_mask & (df.centroid_depth > 1.5)].estimated_biomass_g, bins=100)
plt.grid()
plt.show()

In [None]:
%matplotlib inline
plt.figure(figsize=(20, 10))
plt.hist(df[~hard_outlier_mask & (df.centroid_depth > 1.6) & (df.centroid_depth < 1.8)].estimated_biomass_g, bins=20)
plt.grid()
plt.show()

In [None]:
df[~hard_outlier_mask & (df.centroid_depth < 1.0)].estimated_biomass_g.mean()

In [None]:
def triangulation_accuracy(depth, baseline, theta_fov):
    return baseline / (2*depth*np.tan((theta_fov / 2.0) * (np.pi / 180.0)))

def overlapping_field_size(depth, baseline, theta_fov):
    return 2*depth*np.tan((theta_fov / 2.0) * (np.pi / 180.0)) - baseline

In [None]:
triangulation_accuracy(0.8, 0.2, 70.0)

In [None]:
triangulation_accuracy(0.8, 0.1, 55.0)

In [None]:
triangulation_accuracy(1.0, 0.3, 80.0)

In [None]:
overlapping_field_size(0.6, 0.25, 85.0)

In [None]:
df.right_image_url.iloc[0]

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

In [None]:
bucket = 'aquabyte-crops'
left_key = 'environment=production/site-id=29/pen-id=17/date=2019-07-09/hour=13/at=2019-07-09T13:12:42.376387000Z/left_frame_crop_1006_1004_3354_1643.jpg'
right_key = 'environment=production/site-id=29/pen-id=17/date=2019-07-09/hour=13/at=2019-07-09T13:12:42.376387000Z/right_frame_crop_782_1033_3010_1673.jpg'
left_image_f = s3_access_utils.download_from_s3(bucket, left_key)
right_image_f = s3_access_utils.download_from_s3(bucket, right_key)
imageL = cv2.imread(left_image_f)
imageR = cv2.imread(right_image_f)


In [None]:
MIN_MATCH_COUNT = 10
GOOD_PERC = 0.7

sift = cv2.KAZE_create()
img1 = enhance(imageL)
img2 = enhance(imageR)
kp1, des1 = sift.detectAndCompute(img1,None)
kp2, des2 = sift.detectAndCompute(img2,None)

FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)


flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1,des2,k=2)
good = []
for m,n in matches:
    if m.distance < GOOD_PERCBGBBH*n.distance:
        good.append(m)
if len(good)>=MIN_MATCH_COUNT:
    src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
    dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
    matchesMask = mask.ravel().tolist()
else:
    print("Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT))
    matchesMask = None

In [None]:

import mpld3
# mpld3.disable_notebook()
# mpld3.enable_notebook()

def draw_matches(img1, kp1, img2, kp2, matches, matchesMask, color=None, drawFeatures=True): 
    if len(img1.shape) == 3:
        new_shape = (max(img1.shape[0], img2.shape[0]), img1.shape[1]+img2.shape[1], img1.shape[2])
    elif len(img1.shape) == 2:
        new_shape = (max(img1.shape[0], img2.shape[0]), img1.shape[1]+img2.shape[1])
    new_img = np.zeros(new_shape, type(img1.flat[0]))  
    # Place images onto the new image.
    new_img[0:img1.shape[0],0:img1.shape[1]] = img1
    new_img[0:img2.shape[0],img1.shape[1]:img1.shape[1]+img2.shape[1]] = img2
    
    if drawFeatures==False:
        return new_img

    # Draw lines between matches.  Make sure to offset kp coords in second image appropriately.
    r = 15
    thickness = 3
    if color:
        c = color
    i=0
    for m in matches:
        i=i+1
        # Generate random color for RGB/BGR and grayscale images as needed.
        if not color: 
            c = np.random.randint(0,256,3) if len(img1.shape) == 3 else np.random.randint(0,256)
            c = tuple([int(x) for x in c])        
        if matchesMask[i-1]==0: 
            continue
        end1 = tuple(np.round(kp1[m.queryIdx].pt).astype(int))
        end2 = tuple(np.round(kp2[m.trainIdx].pt).astype(int) + np.array([img1.shape[1], 0]))
        cv2.line(new_img, end1, end2, c, thickness)
        cv2.circle(new_img, end1, r, c, thickness)
        cv2.circle(new_img, end2, r, c, thickness)
    return new_img

# draw_params = dict(matchColor = (0,0,255), # draw matches in white color
#                    singlePointColor = None,
#                    matchesMask = matchesMask, # draw only inliers
#                    flags = 4)
# # print(draw_params)
# img3 = cv2.drawMatches(img1,kp1,img2,kp2,good,None,**draw_params)

img3 = draw_matches(img1,kp1,img2,kp2,good,matchesMask,matchColor,False)
img3o = draw_matches(img1,kp1,img2,kp2,good,matchesMask,matchColor,True)
alpha = 0.3  # Transparency factor.
img3 = cv2.addWeighted(img3o, alpha, img3, 1 - alpha, 0)

f, ax = plt.subplots(1, figsize=(20, 10))
ax.imshow(img3)
ax.axis("off")
plt.show()
# mpld3.display(f)


In [None]:
def enhance(image, clip_limit=5):
    # convert image to LAB color model
    image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)

    # split the image into L, A, and B channels
    l_channel, a_channel, b_channel = cv2.split(image_lab)

    # apply CLAHE to lightness channel
    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(8, 8))
    cl = clahe.apply(l_channel)

    # merge the CLAHE enhanced L channel with the original A and B channel
    merged_channels = cv2.merge((cl, a_channel, b_channel))

    # convert image from LAB color model back to RGB color model
    final_image = cv2.cvtColor(merged_channels, cv2.COLOR_LAB2BGR)
    return final_image 