In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import cv2

import glob
import os
import boto3
from sqlalchemy import create_engine, MetaData, Table, select, and_, func
from sqlalchemy.orm import sessionmaker, relationship, join
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Table, Column, Integer, ForeignKey
from sqlalchemy.orm import relationship
from aquabyte.optics import convert_to_world_point, depth_from_disp, pixel2world, euclidean_distance
from aquabyte.data_access_utils import DataAccessUtils

import pickle
from PIL import Image, ImageDraw

pd.set_option('max_columns', 500)
pd.set_option('max_colwidth', 50)

<h1> Establish connection to database and perform query for base dataset </h1>

In [None]:
# AWS credentials
aws_credentials = json.load(open(os.environ["AWS_CREDENTIALS"]))
s3_client = boto3.client('s3', aws_access_key_id=aws_credentials["aws_access_key_id"],
                         aws_secret_access_key=aws_credentials["aws_secret_access_key"],
                         region_name="eu-west-1")

data_access_utils = DataAccessUtils()

# prod SQL credentaials
sql_credentials = json.load(open(os.environ["PROD_SQL_CREDENTIALS"]))

sql_query = '''
select * from keypoint_annotations
where captured_at >= '2019-05-15'
and site_id = 23
and pen_id = 4;
'''

original_df = data_access_utils.extract_from_database(sql_query)
# original_df = original_df.loc[:, ~original_df.columns.duplicated()]
        

<h1> Iterate over query results and generate 3D coordinates + biomass estimates for each stereo fish detection </h1>

In [None]:
def coord2biomass_linear(world_keypoints, model):
    """from coordinates to biomass"""

    mean = model['mean']
    std= model['std']
    PCA_components = model['PCA_components']
    reg_coef = model['reg_coef']
    reg_intercept = model['reg_intercept']
    body_parts = model['body_parts']

    # calculate pairwise distances for production coord
    # based on the exact ordering reflected in the body_parts
    # variable above

    pairwise_distances = []
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            dist = euclidean_distance(world_keypoints[body_parts[i]], world_keypoints[body_parts[j]])
            pairwise_distances.append(dist)
    
    interaction_values = []
    for i in range(len(pairwise_distances)):
        for j in range(i, len(pairwise_distances)):
            dist1 = pairwise_distances[i]
            dist2 = pairwise_distances[j]
            interaction_values.append(dist1 * dist2)

    X = np.array(pairwise_distances + interaction_values)

    X_normalized = (X - model['mean']) / model['std']
    X_transformed = np.dot(X_normalized, model['PCA_components'].T)
    prediction = np.dot(X_transformed, reg_coef) + reg_intercept
    return prediction


def coord2biomass_blender(world_keypoints, blender):
    """from coordinates to biomass"""

    reverse_mapping = blender["reverse_mapping"]
    distances = np.array(blender["distances"])
    volumes = blender["volume"]
    regression_coeff = blender["coeff"]

    # calculate pairwise distances for production coord
    # the reverse mapping insure that we listing the kp
    # in the same order
    measurements = []
    number_of_parts = len(world_keypoints)
    for k in range(number_of_parts):
        v = world_keypoints[reverse_mapping[str(k)]]
        for k0 in range(k+1, number_of_parts):
            v0 = world_keypoints[reverse_mapping[str(k0)]]
            dist = euclidean_distance(v, v0)*1000 # mm to m
            measurements.append(dist)
    measurements = np.array(measurements)

    # absolute diff
    diff = np.nanmean(np.abs(distances - measurements), axis=1)
    closest = np.argmin(diff)
    prediction = volumes[closest]

    # here is some machine learning
    prediction = prediction*regression_coeff[0] + regression_coeff[1]
    return prediction



In [None]:
def generate_rotation_matrix(u_base, v):
    u = v / np.linalg.norm(v)
    n = np.cross(u_base, u)
    n = n / np.linalg.norm(n)
    theta = -np.arccos(np.dot(u, u_base))

    R = np.array([[
        np.cos(theta) + n[0]**2*(1-np.cos(theta)), 
        n[0]*n[1]*(1-np.cos(theta)) - n[2]*np.sin(theta),
        n[0]*n[2]*(1-np.cos(theta)) + n[1]*np.sin(theta)
    ], [
        n[1]*n[0]*(1-np.cos(theta)) + n[2]*np.sin(theta),
        np.cos(theta) + n[1]**2*(1-np.cos(theta)),
        n[1]*n[2]*(1-np.cos(theta)) - n[0]*np.sin(theta),
    ], [
        n[2]*n[0]*(1-np.cos(theta)) - n[1]*np.sin(theta),
        n[2]*n[1]*(1-np.cos(theta)) + n[0]*np.sin(theta),
        np.cos(theta) + n[2]**2*(1-np.cos(theta))
    ]])
    
    return R

def normalize_world_keypoints(world_keypoint_coordinates):
    body_parts = sorted(world_keypoint_coordinates.keys())
    wkps = {bp: np.array(world_keypoint_coordinates[bp]) for bp in body_parts}
    
    # translate keypoints such that tail notch is at origin
    translated_wkps = {bp: wkps[bp] - wkps['TAIL_NOTCH'] for bp in body_parts}
    
    # perform first rotation
    u_base=np.array([1, 0, 0])
    v = translated_wkps['UPPER_LIP']
    R = generate_rotation_matrix(u_base, v)
    norm_wkps_intermediate = {bp: np.dot(R, translated_wkps[bp]) for bp in body_parts}
    
    # perform second rotation
    u_base = np.array([0, 0, 1])
    v = norm_wkps_intermediate['DORSAL_FIN'] - np.array([norm_wkps_intermediate['DORSAL_FIN'][0], 0, 0])
    R = generate_rotation_matrix(u_base, v)
    norm_wkps = {bp: np.dot(R, norm_wkps_intermediate[bp]) for bp in body_parts}
    
    return norm_wkps
    


In [None]:
# load model parameters for Blender and linear models
model = pickle.load(open('/root/data/alok/biomass_estimation/models/model.pkl', 'rb'))
blender = json.load(open('/root/data/alok/biomass_estimation/models/volumes.json'))


# establish new columns
mask = (original_df.is_skipped == False) & (~original_df.keypoints.isnull())
for col in ['left_keypoints', 'right_keypoints', 'world_keypoint_coordinates']:
    original_df[col] = np.nan
    original_df[col] = original_df[col].astype(object)
for col in ['predicted_biomass_linear', 'predicted_biomass_blender', 
            'max_y_coordinate_deviation', 'max_y_world_coordinate_deviation']:
    original_df[col] = np.nan


# modify the dataframe row-by-row
for idx, row in original_df[mask].iterrows():
    keypoints = row.keypoints
    left_keypoints = keypoints['leftCrop']
    right_keypoints = keypoints['rightCrop']
            
    # compute world coordinates
    camera_metadata = row.camera_metadata
    camera_metadata['pixelCountHeight'] = 3000
    camera_metadata['pixelCountWidth'] = 4096
    world_keypoint_coordinates = pixel2world(left_keypoints, right_keypoints, camera_metadata)
    
    # update dataframe with world keypoint coordinates
    original_df.at[idx, 'left_keypoints'] = left_keypoints
    original_df.at[idx, 'right_keypoints'] = right_keypoints
    original_df.at[idx, 'world_keypoint_coordinates'] = world_keypoint_coordinates
    
    body_parts = sorted(list(world_keypoint_coordinates.keys()))
    for i in range(len(body_parts)-1):
        for j in range(i+1, len(body_parts)):
            bp1, bp2 = body_parts[i], body_parts[j]
            col = '{}<->{}'.format(body_parts[i], body_parts[j])
            if not col in original_df.columns:
                original_df[col] = np.nan
            original_df.at[idx, col] = \
                euclidean_distance(world_keypoint_coordinates[bp1], world_keypoint_coordinates[bp2])
            
    for i in range(len(body_parts)):
        bp = body_parts[i]
        col = '{}_Y'.format(bp)
        if not col in original_df.columns:
            original_df[col] = np.nan
        original_df.at[idx, col] = world_keypoint_coordinates[bp][1]
    
    # update dataframe with biomass predictions from both models
    predicted_biomass_linear = coord2biomass_linear(world_keypoint_coordinates, model)
    predicted_biomass_blender = coord2biomass_blender(world_keypoint_coordinates, blender)
    original_df.at[idx, 'predicted_biomass_linear'] = predicted_biomass_linear
    original_df.at[idx, 'predicted_biomass_blender'] = predicted_biomass_blender
    
    # update dataframe with keypoint deviation
    threshold = 10
    left_keypoint_y_coords = {bp['keypointType']: bp['yFrame'] for bp in left_keypoints}
    right_keypoint_y_coords = {bp['keypointType']: bp['yFrame'] for bp in right_keypoints}
    max_y_coordinate_deviation = \
        max([abs(left_keypoint_y_coords[bp] - right_keypoint_y_coords[bp]) for bp in body_parts])
    
    original_df.at[idx, 'max_y_coordinate_deviation'] = max_y_coordinate_deviation
    
    # add 3D range for world coordinate y-values
    
    
    norm_wkps = normalize_world_keypoints(world_keypoint_coordinates)
    norm_wkp_y_values = [norm_wkps[bp][1] for bp in norm_wkps.keys()]
    max_y_world_coordinate_deviation = max(norm_wkp_y_values) - min(norm_wkp_y_values)
    original_df.at[idx, 'max_y_world_coordinate_deviation'] = max_y_world_coordinate_deviation



<h1> Apply filters </h1>

In [None]:
invalid_fish_detection_ids = json.load(open('/root/data/alok/biomass_estimation/invalid_fish_detection_ids.json'))
df = original_df.copy(deep=True)

# define filters
valid_linear_prediction_mask = ~df.predicted_biomass_linear.isnull()
rectification_valid_mask = (~df.fish_detection_id.isin(invalid_fish_detection_ids))
keypoints_valid_mask = (df.max_y_coordinate_deviation < 15)
qa_mask = df.is_qa == True

orient_mask_1 = (df['DORSAL_FIN_Y'] - df['TAIL_NOTCH_Y'])  * (df['UPPER_LIP_Y'] - df['TAIL_NOTCH_Y']) > 0 
orient_mask_2 = (df['UPPER_LIP_Y'] - df['ADIPOSE_FIN_Y'])  * (df['UPPER_LIP_Y'] - df['TAIL_NOTCH_Y']) > 0
inlier_mask = (df.predicted_biomass_linear > np.percentile(original_df.predicted_biomass_linear.dropna(), 1.0)) & \
              (df.predicted_biomass_linear < np.percentile(original_df.predicted_biomass_linear.dropna(), 99.0))

mask_valid = valid_linear_prediction_mask & rectification_valid_mask & keypoints_valid_mask & qa_mask

mask = mask_valid & orient_mask_1 & orient_mask_2 & inlier_mask

df = df[mask].copy(deep=True)
df.index = pd.to_datetime(df.captured_at)

print(sum(mask_valid), len(df))


In [None]:
def expected_count(working_distance, fish_size, fps=1.0, orientation_angle=0.0):
    orientation_angle_rad = np.pi * orientation_angle / 180.0
    working_distance = 1.0
    field_size = camera_metadata['imageSensorWidth'] * working_distance / camera_metadata['focalLength']
    overlapping_field_size = (field_size - camera_metadata['baseline'])
    fish_size = fish_size * np.cos(orientation_angle_rad)
    fish_speed = fish_size * np.cos(orientation_angle_rad) # meters per second
    dist_between_frames = fish_speed / fps
    expected_count = (overlapping_field_size - fish_size) / dist_between_frames
    return max(expected_count, 0.0)
    


In [None]:
depths = []
lengths = []
thetas = []
curvature_thetas = []
inverse_expected_counts = []
for wkp in df['world_keypoint_coordinates'].tolist():
    y_values = [x[1] for x in wkp.values()]
    mean_depth = np.array(y_values).mean()
    depths.append(mean_depth)
    length = euclidean_distance(wkp['UPPER_LIP'], wkp['TAIL_NOTCH'])
    lengths.append(length)
    direction_vector = np.array(wkp['UPPER_LIP']) - np.array(wkp['TAIL_NOTCH'])
    direction_vector[0] = 0
    base_vector = np.array([0, 0, 1]) if wkp['UPPER_LIP'][2] > wkp['TAIL_NOTCH'][2] else np.array([0, 0, -1])
    theta = (180 / np.pi) * np.arccos(np.dot(direction_vector, base_vector) / np.linalg.norm(direction_vector))
    thetas.append(theta)
    
    # calculate curvature
    wkp = {bp: [wkp[bp][2], wkp[bp][1], wkp[bp][0]] for bp in body_parts}
    fv1 = np.array(wkp['UPPER_LIP']) - np.array(wkp['DORSAL_FIN'])
    fv2 = np.array(wkp['UPPER_LIP']) - np.array(wkp['PELVIC_FIN'])
    n1 = np.cross(fv1, fv2)
    
    bv1 = np.array(wkp['PELVIC_FIN']) -  np.array(wkp['TAIL_NOTCH'])
    bv2 = np.array(wkp['DORSAL_FIN']) -  np.array(wkp['TAIL_NOTCH'])
    n2 = np.cross(bv1, bv2)
    curvature_theta = (180 / np.pi) * np.arccos(np.dot(n1, n2) / (np.linalg.norm(n1) * np.linalg.norm(n2)))
    curvature_thetas.append(curvature_theta)
    
    inverse_expected_count = 1.0 / expected_count(mean_depth, length, orientation_angle=theta)
    inverse_expected_counts.append(inverse_expected_count)
    
df['length'] = lengths
df['depth'] = depths
df['inverse_expected_counts'] = inverse_expected_counts
df['theta'] = thetas
df['curvature'] = curvature_thetas


In [None]:
depth_values = np.arange(0.8, 2.0, 0.1)
x_ticks = []
conditional_lengths = []
conditional_weights = []
modified_conditional_weights = []
for i in range(len(depth_values) - 1):
    depth_range_mask = (df.depth > depth_values[i]) & (df.depth < depth_values[i+1])
    conditional_length = df[depth_range_mask].length.mean()
    conditional_lengths.append(conditional_length)
    modified_conditional_weight = (df[depth_range_mask].predicted_biomass_linear * df[depth_range_mask].inverse_expected_counts).sum()/df[depth_range_mask].inverse_expected_counts.sum()
    conditional_weight = df[depth_range_mask].predicted_biomass_linear.mean()
    modified_conditional_weights.append(modified_conditional_weight)
    conditional_weights.append(conditional_weight)
    x_tick = (depth_values[i] + depth_values[i+1]) / 2.0
    x_ticks.append(x_tick)


In [None]:
plt.figure(figsize=(10, 5))
plt.bar(x_ticks, conditional_lengths, width=0.08)
plt.xlabel('Fish distance from camera (m)')
plt.ylabel('Average fish length')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.bar(x_ticks, conditional_weights, width=0.08)
plt.xlabel('Fish distance from camera (m)')
plt.ylabel('Average fish weight')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
plt.bar(x_ticks, conditional_weights, width=0.08, alpha=0.5)
plt.bar(x_ticks, modified_conditional_weights, width=0.08, alpha=0.5)
plt.xlabel('Fish distance from camera (m)')
plt.ylabel('Average fish weight')
plt.show()

In [None]:

plt.scatter(df.depth, df.predicted_biomass_linear)
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(thetas, bins=100)
plt.xlabel('Fish orientation relative to lateral (degrees)')
plt.ylabel('Frequency')
plt.grid()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(depths, bins=100)
plt.xlabel('Fish depth (meters)')
plt.ylabel('Frequency')
plt.grid()

<h1> Curvature </h1>

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(curvature_thetas, bins=100)
plt.xlabel('Fish curvature angle (degrees)')
plt.ylabel('Frequency')
plt.grid()

In [None]:
df = original_df.copy()

In [None]:
mask = df.max_y_world_coordinate_deviation < 0.15

<h1> Examine the worst cases </h1>

In [None]:
df[(df.is_skipped == True) & (df.is_partial==False)]

In [None]:
def plot_coordinates(image_url, side, keypoints):
    image_f = './image.jpg'
#     bucket = 'aquabyte-frames-resized-inbound'
    bucket = 'aquabyte-crops'
    key = image_url[image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    s3_client.download_file(bucket, key, image_f)
    plt.figure(figsize=(30, 10))
    im = plt.imread(image_f)
    
#     for keypoint in keypoints:
#         keypoint_type = keypoint['keypointType']
#         x, y = keypoint['xCrop'], keypoint['yCrop']
#         plt.scatter([x], [y])
#         plt.annotate(keypoint_type, (x, y), color='red')
        
    plt.imshow(im)        
    

In [None]:
keypoint_annotation_id = 96008
keypoint_annotation_mask = (df.id == keypoint_annotation_id)
left_image_url = df[keypoint_annotation_mask].left_image_url.iloc[0]
left_keypoints = df[keypoint_annotation_mask].left_keypoints.iloc[0]
right_image_url = df[keypoint_annotation_mask].right_image_url.iloc[0]
right_keypoints = df[keypoint_annotation_mask].right_keypoints.iloc[0]

world_keypoint_coordinates = df[keypoint_annotation_mask].world_keypoint_coordinates.iloc[0]
im_left = plot_coordinates(left_image_url, 'left', left_keypoints)
im_right = plot_coordinates(right_image_url, 'right', right_keypoints)

In [None]:
keypoint_annotation_id = 128200
keypoint_annotation_mask = (df.id == keypoint_annotation_id)
left_image_url = df[keypoint_annotation_mask].left_image_url.iloc[0]
left_keypoints = df[keypoint_annotation_mask].left_keypoints.iloc[0]
right_image_url = df[keypoint_annotation_mask].right_image_url.iloc[0]
right_keypoints = df[keypoint_annotation_mask].right_keypoints.iloc[0]

world_keypoint_coordinates = df[keypoint_annotation_mask].world_keypoint_coordinates.iloc[0]
im_left = plot_coordinates(left_image_url, 'left', left_keypoints)
im_right = plot_coordinates(right_image_url, 'right', right_keypoints)

In [None]:
right_image_url

In [None]:
norm_wkps = normalize_world_keypoints(world_keypoint_coordinates)
[(bp, norm_wkps[bp][1]) for bp in norm_wkps.keys()]

In [None]:
bp = 'TAIL_NOTCH'
disp = [i['xFrame'] for i in left_keypoints if i['keypointType'] == bp][0] - [i['xFrame'] for i in right_keypoints if i['keypointType'] == bp][0]



In [None]:
(camera_metadata['focalLengthPixel'] * camera_metadata['baseline'])/disp

In [None]:
print(disp)

In [None]:
imleft_keypoints

In [None]:
bgr = cv2.imread('./image.jpg')
lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
lab_planes = cv2.split(lab)
gridsize = 16
clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(gridsize,gridsize))
lab_planes[0] = clahe.apply(lab_planes[0])
lab = cv2.merge(lab_planes)
bgr = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

In [None]:
plt.figure(figsize=(20, 10))
plt.imshow(bgr)

In [None]:
left_image_url

In [None]:
def load_params(params_file):
    params = json.load(open(params_file))
    cameraMatrix1 = np.array(params['CameraParameters1']['IntrinsicMatrix']).transpose()
    cameraMatrix2 = np.array(params['CameraParameters2']['IntrinsicMatrix']).transpose()

    distCoeffs1 = params['CameraParameters1']['RadialDistortion'][0:2] + \
                   params['CameraParameters1']['TangentialDistortion'] + \
                   [params['CameraParameters1']['RadialDistortion'][2]]
    distCoeffs1 = np.array(distCoeffs1)

    distCoeffs2 = params['CameraParameters2']['RadialDistortion'][0:2] + \
                   params['CameraParameters2']['TangentialDistortion'] + \
                   [params['CameraParameters2']['RadialDistortion'][2]]
    distCoeffs2 = np.array(distCoeffs2)

    R = np.array(params['RotationOfCamera2']).transpose()
    T = np.array(params['TranslationOfCamera2']).transpose()

    imageSize = (4096, 3000)
    
    # perform rectification
    (R1, R2, P1, P2, Q, leftROI, rightROI) = cv2.stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2, imageSize, R, T, None, None, None, None, None, cv2.CALIB_ZERO_DISPARITY, 0)

    left_maps = cv2.initUndistortRectifyMap(cameraMatrix1, distCoeffs1, R1, P1, imageSize, cv2.CV_16SC2)
    right_maps = cv2.initUndistortRectifyMap(cameraMatrix2, distCoeffs2, R2, P2, imageSize, cv2.CV_16SC2)
    
    return left_maps, right_maps

def get_remap(crop, side, crop_metadata, stereo_params_f):
    left_maps, right_maps = load_params(stereo_params_f)
    maps = left_maps if side == 'left' else right_maps
    new_image = np.zeros([3000, 4096, 3]).astype('uint8')
    lower_left = (crop_metadata['y_coord'] + crop_metadata['height'], crop_metadata['x_coord'])
    upper_right = (crop_metadata['y_coord'], crop_metadata['x_coord'] + crop_metadata['width'])
    new_image[upper_right[0]:lower_left[0], lower_left[1]:upper_right[1], :] = np.array(crop)
    remap = cv2.remap(new_image, maps[0], maps[1], cv2.INTER_LANCZOS4)
    nonzero_indices = np.where(remap > 0)
    y_min, y_max = nonzero_indices[0].min(), nonzero_indices[0].max() 
    x_min, x_max = nonzero_indices[1].min(), nonzero_indices[1].max()
    lower_left = (y_max, x_min)
    upper_right = (y_min, x_max)
    rectified_crop = remap[upper_right[0]:lower_left[0], lower_left[1]:upper_right[1], :].copy()
    print(crop_metadata)
    rectified_crop_metadata = crop_metadata.copy()
    rectified_crop_metadata['x_coord'] = x_min
    rectified_crop_metadata['y_coord'] = y_min
    rectified_crop_metadata['width'] = x_max - x_min
    rectified_crop_metadata['height'] = y_max - y_min
    
    return remap, rectified_crop_metadata



In [None]:
stereo_params_f = './2019-04-26_blom_kjeppevikholmen_pen_1.json'
left_crop_metadata = df[fish_id_mask].left_crop_metadata.iloc[0]
left_new_image = np.zeros([3000, 4096, 3]).astype('uint8')
left_remap, rectified_left_crop_metadata = get_remap(im_left, 'left', left_crop_metadata, stereo_params_f)

right_crop_metadata = df[fish_id_mask].right_crop_metadata.iloc[0]
right_new_image = np.zeros([3000, 4096, 3]).astype('uint8')
right_remap, rectified_right_crop_metadata = get_remap(im_right, 'right', right_crop_metadata, stereo_params_f)

In [None]:
[(kp['keypointType'], kp['yCrop'] + rectified_left_crop_metadata['y_coord']) for kp in left_keypoints]

In [None]:
[(kp['keypointType'], kp['yCrop'] + rectified_right_crop_metadata['y_coord']) for kp in right_keypoints]

In [None]:
Image.fromarray(right_remap).save('./right_remap.jpg')
Image.fromarray(left_remap).save('./left_remap.jpg')

In [None]:
left_keypoints

In [None]:
right_keypoints

In [None]:
x

In [None]:
df['2019-05-02']['predicted_biomass_blender'].mean()

In [None]:
mask = (df.site_id == 23) & (df.pen_id == 4) & (df.index >= '2019-04-27')
df[mask].predicted_biomass_blender.resample('D', how=lambda x: x.mean())

In [None]:
plt.hist(df[mask].predicted_biomass_blender)

In [None]:
df[mask].shape

In [None]:
mask = (df.predicted_biomass_linear > 500) & (df.predicted_biomass_linear < 6000)
plt.scatter(df.ix[mask, 'predicted_biomass_blender'], df.ix[mask, 'predicted_biomass_linear'])
plt.show()

<h1> Examine rectification issue </h1>

In [None]:
df.shape

In [None]:
df = pd.read_csv('./data_dump.csv')

In [None]:
rectified_bucket = 'aquabyte-crops'
left_image_rectified_f = './left_image_rectified.jpg'
right_image_rectified_f = './right_image_rectified.jpg'

invalid_fish_detection_ids, invalid_urls = [], []
i = 0
for idx, row in df.iterrows():
    if i % 100 == 0:
        print(i)
    i += 1
    if i < 36132:
        continue
    left_image_url = row.left_image_url
    right_image_url = row.right_image_url
    left_rectified_key = left_image_url[left_image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    s3_client.download_file(rectified_bucket, left_rectified_key, left_image_rectified_f)
    right_rectified_key = right_image_url[right_image_url.index('aquabyte-crops') + len('aquabyte-crops') + 1:]
    s3_client.download_file(rectified_bucket, right_rectified_key, right_image_rectified_f)
    
    # this is dumb, can probably do this in memory
    left_rectified_image = cv2.imread(left_image_rectified_f)
    right_rectified_image = cv2.imread(right_image_rectified_f)
    
    left_crop_metadata = json.loads(row.left_crop_metadata)
    right_crop_metadata = json.loads(row.right_crop_metadata)
    left_crop_width = left_crop_metadata['width']
    left_crop_height = left_crop_metadata['height']
    right_crop_width = right_crop_metadata['width']
    right_crop_height = right_crop_metadata['height']
    
    invalid = False
    if left_rectified_image.shape[0] == left_crop_height and left_rectified_image.shape[1] == left_crop_width:
        invalid = True
        invalid_urls.append(left_image_url)
        print('left image not rectified for id {}!'.format(row.id))
    if right_rectified_image.shape[0] == right_crop_height and right_rectified_image.shape[1] == right_crop_width:
        invalid = True
        invalid_urls.append(right_image_url)
        print('right image not rectified for id {}!'.format(row.id))
    
    if invalid:
        invalid_fish_detection_ids.append(int(row.id))
    
    
    
        
    
        

In [None]:
pickle.dump(invalid_ids, open('./invalid_ids', 'wb'))

In [None]:
i

In [None]:
json.dump(invalid_urls + invalid_urls_old, open('./invalid_urls.json', 'w'))

In [None]:
invalid_urls_old = json.load(open('./invalid_urls.json'))

In [None]:
invalid_fish_detection_ids_old = json.load(open('./invalid_fish_detection_ids.json'))

In [None]:
json.dump(invalid_fish_detection_ids + invalid_fish_detection_ids_old, open('./invalid_fish_detection_ids.json', 'w'))

<h1> Prod data backfill </h1>

In [None]:
sql_query = '''
select 

k.id, k.fish_detection_id, k.annotated_by_email, k.is_qa, 
k.is_skipped, k.is_blurry, k.is_dark, k.is_occluded,
k.is_bad_orientation, k.is_partial, k.direction, k.keypoints, 
k.work_duration_left_ms, k.work_duration_right_ms, f.created_at, 
f.updated_at, f.captured_at, f.site_id, f.pen_id, f.left_image_url, f.right_image_url, 
f.left_crop_metadata, f.right_crop_metadata, f.camera_metadata

from keypoint_annotations k
left join fish_detections f
on k.fish_detection_id = f.id
'''

df = extract_from_database(sql_query, sql_credentials)


In [None]:
for idx, row in df.iterrows():
    if row.keypoints:
        keypoints = row.keypoints
        left_crop_metadata = row.left_crop_metadata
        right_crop_metadata = row.right_crop_metadata
        
        for 
        
        break

In [None]:
left_keypoints