<h1> Kjeppevikholmen Optical Sampling Bias Study </h1>

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from aquabyte.visualize import Visualizer
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
import json, os
import cv2
import torch
import pytz
from PIL import Image
import datetime as dt
import dateutil
from urllib.parse import urlparse
from collections import defaultdict
from multiprocessing import Manager, Pool


<h1> Optical Sampling Bias Study </h1>

<h2> Simulate FOVs of all sizes, and for each one get the average crop width / crop height / crop area. For FOVs smaller than the current one, get the average bimoass as well </h2>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations
    where pen_id=5
    and is_qa=FALSE
    and captured_at between '2019-06-26' and '2019-06-30';
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

In [None]:
start_date, end_date = '2019-06-26', '2019-06-27'
s3_path_components = urlparse(df.left_image_url.iloc[0]).path.lstrip('/').split('/')
bucket, key = s3_path_components[0], os.path.join(*s3_path_components[1:])
inbound_bucket = 'aquabyte-frames-resized-inbound'
s3_folder = key[:key.index(start_date) + len(start_date)]

In [None]:
generator = s3_access_utils.get_matching_s3_keys(inbound_bucket, s3_folder, suffixes=['capture.json'])

In [None]:
keys = []
for key in generator:
    if len(keys) % 1000 == 0:
        print(len(keys))
    keys.append(key)

In [None]:
s3_key_dirs = sorted(list(set([os.path.dirname(f) for f in keys])))

In [None]:
def process_s3_key_dir(s3_key_dir):
    global shared_list
    crop_metadata_f = s3_access_utils.download_from_s3(inbound_bucket, os.path.join(s3_key_dir, 'crops.json'))
    crop_metadata = json.load(open(crop_metadata_f))
    for ann in crop_metadata['annotations']:
        bbox = ann['bbox']
        x_lower, y_lower, x_upper, y_upper = bbox[1], bbox[0], bbox[3], bbox[2]
        x_lower = round(max(x_lower * (4096 / 512) - 50, 0))
        y_lower = round(max(y_lower * (3000 / 512) - 50, 0))
        x_upper = round(min(x_upper * (4096 / 512) + 50, 4096))
        y_upper = round(min(y_upper * (3000 / 512) + 50, 3000))
        shared_list.append([x_lower, y_lower, x_upper, y_upper])
        
    if len(shared_list) % 100 == 0:
        print(len(shared_list))


In [None]:
manager = Manager()
shared_list = manager.list()
pool = Pool(20)
pool.map(process_s3_key_dir, s3_key_dirs)

In [None]:
X = np.array(shared_list)

In [None]:
def get_fov_cutoffs(fov, cm):
    fov = fov * np.pi / 180.0
    field_size_px = 2*cm['focalLengthPixel'] * np.tan(fov / 2.0)
    min_cutoff = (cm['pixelCountWidth'] - field_size_px) / 2.0
    max_cutoff = (cm['pixelCountWidth'] + field_size_px) / 2.0
    return min_cutoff, max_cutoff

In [None]:
fovs = list(np.arange(10, 55, 1))
mean_widths = []
cm = df.camera_metadata.iloc[0]
for fov in fovs:
    min_cutoff, max_cutoff = get_fov_cutoffs(fov, cm)
    mask = np.where((X[:, 0] > min_cutoff) & (X[:, 2] < max_cutoff))
    mean_widths.append((X[mask, 2] - X[mask, 0]).mean())

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, mean_widths)
plt.title('Empirical Optical Sampling Bias')
plt.xlabel('Percetange of single camera FOV in stereo overlap region')
plt.ylabel('Mean Crop Width')
plt.grid()
plt.show()

In [None]:
FULL_HEIGH_PX = 3000
pct_coverages = np.arange(0.2, 1, 0.01)
mean_heights = []
for pct in pct_coverages:
    upper_y = round(pct * FULL_HEIGH_PX)
    mask = np.where((X[:, 3] < upper_y) & (X[:, 3] - X[:, 1] < 800))
    mean_heights.append((X[mask, 3] - X[mask, 1]).mean())

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(pct_coverages, mean_heights)
plt.grid()
plt.show()

<h1> Weight Based Optical Sampling Bias Study </h1>

<h2> Get weight dataframe </h2>

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from aquabyte.visualize import Visualizer
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
import json, os
import cv2
import torch
import pytz
from PIL import Image
import datetime as dt
import dateutil


In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations
    where pen_id=5
    and keypoints -> 'leftCrop' is not null
    and keypoints -> 'rightCrop' is not null
    and captured_at between '2019-06-23' and '2019-06-30'
    and is_qa=FALSE;
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()

# load neural network weights
biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')


In [None]:
def generate_weight(row_id, keypoints, cm):
    
    # run biomass estimation
    input_sample = {
        'keypoints': keypoints,
        'cm': cm,
        'stereo_pair_id': row_id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform_biomass.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    
    return weight_prediction

In [None]:
weights = []
count = 0
for idx, row in df.iterrows():
    if count % 1000 == 0:
        print(count)
    count += 1
    weight = generate_weight(row.id, row.keypoints, row.camera_metadata)
    weights.append(weight)


In [None]:
df['weight'] = weights 

In [None]:
def get_fov_cutoffs(fov, cm):
    fov = fov * np.pi / 180.0
    field_size_px = 2*cm['focalLengthPixel'] * np.tan(fov / 2.0)
    min_cutoff = (cm['pixelCountWidth'] - field_size_px) / 2.0
    max_cutoff = (cm['pixelCountWidth'] + field_size_px) / 2.0
    return min_cutoff, max_cutoff

def is_preserved(keypoints, min_cutoff, max_cutoff):
    min_x_left = min([item['xFrame'] for item in keypoints['leftCrop']])
    max_x_left = max([item['xFrame'] for item in keypoints['leftCrop']])
    min_x_right = min([item['xFrame'] for item in keypoints['rightCrop']])
    max_x_right = max([item['xFrame'] for item in keypoints['rightCrop']])
    
    if (min_x_left < min_cutoff) or (min_x_right < min_cutoff) or (max_x_left > max_cutoff) or (max_x_right > max_cutoff):
        return False
    return True
        

In [None]:
fovs = list(np.arange(10, 55, 1))
for fov in fovs:
    min_cutoff, max_cutoff = get_fov_cutoffs(fov, df.camera_metadata.iloc[0])
    is_preserved_list = []
    for idx, row in df.iterrows():
        keypoints = row.keypoints
        is_preserved_list.append(is_preserved(keypoints, min_cutoff, max_cutoff))

    df['is_preserved_{}'.format(fov)] = is_preserved_list


    

In [None]:
pred_weight_means, sample_sizes = [], []
for fov in fovs:
    mask = (df['is_preserved_{}'.format(fov)] == True) & (df.centroid_depth > 1.6)
    pred_weight_means.append(df[mask].weight.mean())
    sample_sizes.append(df[mask].shape[0])


In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, pred_weight_means, s=80)
plt.xlabel('Field of View (degrees)')
plt.ylabel('Estimated biomass (g)')
plt.title('Empirlcal Optical Samling Bias')
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(fovs, (np.array(pred_weight_means) - df.weight.mean()) / df.weight.mean(), s=80)
plt.xlabel('Field of View (degrees)')
plt.ylabel('Estimated biomass (g)')
plt.title('Empirlcal Optical Samling Bias')
plt.grid()
plt.show()



<h1> Weight-Based Study (Depth) </h1>

In [None]:
def get_world_keypoints(row):
    if 'leftCrop' in row.keypoints and 'rightCrop' in row.keypoints:
        return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)
    else:
        return None
    
df['world_keypoints'] = df.apply(
    lambda x: get_world_keypoints(x), axis=1
)

In [None]:
def centroid_depth(wkps):
    if wkps:
        return np.median(np.array([wkp[1] for wkp in wkps.values()]))
    return None

df['centroid_depth'] = df.world_keypoints.apply(lambda x: centroid_depth(x))

In [None]:
%matplotlib inline
depths = list(np.arange(0.6, 2.2, 0.1))
est_weights = []
for i in range(len(depths[:-1])):
    mask = (df.centroid_depth > depths[i]) & (df.centroid_depth < depths[i+1])
    est_weights.append(df[mask].weight.mean())
    

plt.figure(figsize=(20, 10))
x = np.arange(len(depths[:-1]))
plt.bar(x, est_weights)
plt.xticks(x, [round(d, 2) for d in depths[:-1]])
plt.title('Empirlcal Optical Samling Bias')
plt.xlabel('Distance from camera (m)')
plt.ylabel('Estimated biomass (g)')
plt.grid()
plt.show()


In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df[(df.centroid_depth > 0) & (df.centroid_depth < 3.0) & (df.weight > 7000) & (df.weight < 8000)].centroid_depth, bins=10)
plt.title
plt.xlabel('Distance from Camera (m)')
plt.ylabel('Frequency')
plt.grid()
plt.show()

In [None]:
fig, axes = plt.subplots(4, 2, figsize=(20, 20))
weights_list = list(np.arange(0, 9000, 1000))
for i, weight in enumerate(weights_list[:-1]):
    row = int(i / 2)
    col = i % 2
    mask = (df.weight > weights_list[i]) & (df.weight < weights_list[i+1])
    ax = axes[row, col]
    ax.hist(df[mask].centroid_depth, bins=10)
    ax.set_xlim([0, 2.5])
    ax.set_title('Depth distribution for {}g to {}g bucket'.format(weights_list[i], weights_list[i+1]))
    ax.grid()
plt.show()