In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Load LATI data for Blom Kjeppevikholmen Pen ID 5 joined with keypoint annotations </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from lati_fish_detections_lice_annotations a left join 
    (select keypoints, left_image_url, right_image_url, captured_at, camera_metadata, is_qa from keypoint_annotations) b
    on a.captured_at = b.captured_at
    where b.keypoints is not null
    and b.is_qa = true
    and a.pen_id = 4;
"""
cogito_df = rds_access_utils.extract_from_database(query)


In [None]:
s3_access_utils = S3AccessUtils('/root/data')

In [None]:
df = cogito_df.copy(deep=True)
cols = df.columns.tolist()
matches = []
for idx, row in df.iterrows():
    if row.left_image_url.replace('aquabyte-crops', 'aquabyte-crops-lati') == row.image_url:
        matches.append(True)
    else:
        matches.append(False)
df['is_match'] = matches
df = df[(df.is_match == True)]

<h1> Generate Stereo Depth Values </h1>

In [None]:
def get_world_keypoints(row):
    if 'leftCrop' in row.keypoints and 'rightCrop' in row.keypoints:
        return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)
    else:
        return None
    
cogito_df['world_keypoints'] = cogito_df.apply(
    lambda x: get_world_keypoints(x), axis=1
)

def get_centroid_depth(world_keypoints):
    if world_keypoints:
        depths = []
        for bp, wkp in world_keypoints.items():
            depths.append(wkp[1])

        return np.median(np.array(depths))
    else:
        return None

def get_length(world_keypoints):
    if world_keypoints:
        return euclidean_distance(world_keypoints['UPPER_LIP'], world_keypoints['ADIPOSE_FIN'])
    else:
        return None
    
df['world_keypoints'] = df.apply(lambda x: get_world_keypoints(x), axis=1)
df['centroid_depth'] = df.world_keypoints.apply(lambda x: get_centroid_depth(x))
df['length'] = df.world_keypoints.apply(lambda x: get_length(x))
df['image_width'] = df.metadata.apply(lambda x: x['width'])
df['image_height'] = df.metadata.apply(lambda x: x['height'])
df['is_accepted'] = df.is_skipped == False

<h1> Plot Accepts vs. Rejects by Depth </h1>

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.centroid_depth, color='blue', alpha=0.5, bins=20)
plt.hist(df[df.is_accepted].centroid_depth, color='red', alpha=0.5, bins=20)
plt.grid()
plt.show()

<h1> Download Kjeppevikholmen Images Locally </h1>

In [None]:
skip_reasons = ['is_accepted', 'is_too_dark', 'is_blurry']
skip_masks = {}
for skip_reason in skip_reasons:
    skip_masks[skip_reason] = df[skip_reason] == True

In [None]:
np.random.seed(0)
FOCAL_LENGTH = 4015

def process_row(row, skip_reason, lo, hi):
    depth_m = row['centroid_depth']
    line_segment_length_px = object_length_m * FOCAL_LENGTH / depth_m
    image_url = row.image_url
    if 'aquabyte-crops-lati' not in image_url:
        bucket, key = 'aquabyte-crops', urlparse(image_url, allow_fragments=False).path.lstrip('/')
    else:
        components = urlparse(image_url, allow_fragments=False).path.lstrip('/').split('/')
        bucket, key = components[0], os.path.join(*components[1:])
    print(bucket, key)
    image_f = s3_access_utils.download_from_s3(bucket, key)

    im = Image.open(image_f)
#     draw = ImageDraw.Draw(im)
#     draw.line((100, 100, 100+line_segment_length_px, 100))

    f_name = os.path.basename(key)
    f = os.path.join(modified_images_dir, '{}_{}'.format(lo, hi), skip_reason, f_name)
    if not os.path.exists(os.path.dirname(f)):
        os.makedirs(os.path.dirname(f))
    im.save(f)


modified_images_dir = '/root/data/alok/lice_counting/blom_kjeppevikholmen_breakdown_v3'
object_length_m = 0.01
N = 20

depth_values = [round(x, 1) for x in np.arange(0.5, 1.4, 0.1)]

# rejected images due to skip reason
for i in range(len(depth_values)-1):
    print(i)
    lo, hi = depth_values[i], depth_values[i+1]
    depth_mask = (df['centroid_depth'] >= lo) & (df['centroid_depth'] <= hi)
    for target_skip_reason in skip_reasons:
        print(target_skip_reason)
        mask = skip_masks[target_skip_reason]
#         for skip_reason, skip_mask in skip_masks.items():
#             if skip_reason != target_skip_reason:
#                 mask = mask & ~skip_mask
        for idx, row in df[mask & depth_mask].sample(min(N, df[mask & depth_mask].shape[0])).iterrows():
            process_row(row, target_skip_reason, lo, hi)
