In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Get lice annotation data </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from lati_fish_detections_lice_annotations where (pen_id = 57 or pen_id=58 or pen_id=59 or pen_id=60)
    and captured_at >= '2019-09-14';
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
query = """
    select * from lati_fish_detections_lice_annotations_reconciled where (pen_id = 57 or pen_id=58 or pen_id=59 or pen_id=60)
    and captured_at >= '2019-09-14';
"""
reconciled_df = rds_access_utils.extract_from_database(query)

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

In [None]:
FISH_WIDTH_M = 0.0656
FISH_LENGTH_M = 0.2944
FOCAL_LENGTH = 4015

def depth_fn(x):
    w, h = x['width'], x['height']
    theta = np.arctan(h / w) * (180.0 / np.pi)
    phi = np.arctan(FISH_WIDTH_M / FISH_LENGTH_M) * (180.0 / np.pi)
    if theta < phi:
        return w
    elif theta > 90.0 - phi:
        return h
    else:
        return (h**2 + w**2)**0.5

def process_data_df(df):
    df = df[df.is_cleaner_fish != True]
    df['image_width'] = df.metadata.apply(lambda x: x['width'])
    df['image_height'] = df.metadata.apply(lambda x: x['height'])
    df['length_px'] = df.metadata.apply(lambda x: depth_fn(x))
    df['single_image_depth_m'] = FOCAL_LENGTH * FISH_LENGTH_M / df.length_px
    df['stereo_depth_m'] = df.metadata.apply(lambda x: x.get('depth_m'))
    return df

df = process_data_df(df)
reconciled_df = process_data_df(reconciled_df)


In [None]:
df.stereo_depth_m


In [None]:
in_focus_mask = (df.stereo_depth_m > 0.4) & (df.stereo_depth_m < 0.5)
cogito_accept_mask = ~df.is_skipped
is_blurry_mask = df.is_blurry
is_bad_crop_mask = df.is_bad_crop == True
reconciled_in_focus_mask = (reconciled_df.stereo_depth_m > 0.4) & (reconciled_df.stereo_depth_m < 0.5)
reconciled_accept_mask = ~reconciled_df.is_skipped
reconciled_is_blurry_mask = reconciled_df.is_blurry
reconciled_is_bad_crop_mask = reconciled_df.is_bad_crop == True


n = df.shape[0]
n_in_focus = df[in_focus_mask].shape[0]
n_in_focus_accepted_cogito = df[in_focus_mask & (cogito_accept_mask)].shape[0]
n_not_in_focus_accepted_cogito = df[~in_focus_mask & (cogito_accept_mask)].shape[0]
n_in_focus_accepted_reconciled = reconciled_df[reconciled_in_focus_mask & (reconciled_accept_mask)].shape[0]


print('Total number of images inspected by Cogito over the weekend: {}'.format(n))
print('Total number of these images within in-focus range (45 cm - 55 cm): {}'.format(n_in_focus))
print('Total number of in-focus images accepted by Cogito: {}'.format(n_in_focus_accepted_cogito))
print('Total number of not-in-focus images accepted by Cogito: {}'.format(n_not_in_focus_accepted_cogito))
print('Total number of in-focus inmages accepted in QA: {}'.format(n_in_focus_accepted_reconciled))

<h1> What should Cogito have done? </h1>

In [None]:
for idx, row in df[in_focus_mask].iterrows():
    s3_access_utils.download_from_s3()

In [None]:
df[~in_focus_mask & cogito_accept_mask][['image_url', 'is_too_dark', 'is_blurry', 'is_bad_crop', 'is_cleaner_fish', 'depth']]

In [None]:
plt.hist(df[cogito_accept_mask].stereo_depth_m, bins=20)
plt.show()

In [None]:
(1034**2+727**2)**.5

In [None]:
4015*250 / (1263)

In [None]:
(4015 * 0.005) / 0.78

In [None]:
reconciled_df[reconciled_df.adult_female_count > 0].head()

In [None]:
reconciled_df[['adult_female_count_adjusted', 'moving_count_adjusted']].sum()

In [None]:
reconciled_df[reconciled_df.moving_count_adjusted == 1]

In [None]:
focal_length = 0.0138 / 3.45e-6
baseline = 0.101
disparity = (248-200) * (4096/512.)
depth = focal_length * baseline / disparity
print(depth)

In [None]:
plt.hist(reconciled_df[~reconciled_df.is_skipped].depth, bins=20)
plt.show()

In [None]:
reconciled_df.loc[~reconciled_df.is_skipped, ['image_url', 'depth']]

In [None]:
df[df.is_skipped == True].sample(2000)

In [None]:
modified_images_dir = '/root/data/alok/biomass_estimation/modified_images/'
object_length_m = 0.01
N = 50

cogito_accept_mask = ~df.is_skipped
qa_accept_mask = ~reconciled_df.is_skipped
depth_values = [round(x, 1) for x in np.arange(0.2, 0.8, 0.1)]

depth_field = 'stereo_depth_m'
for i in range(len(depth_values)-1):
    print(i)
    lo, hi = depth_values[i], depth_values[i+1]
    depth_mask = (df[depth_field] >= lo) & (df[depth_field] <= hi)
    reconciled_depth_mask = (reconciled_df[depth_field] >= lo) & (reconciled_df[depth_field] <= hi)
    
    # rejected images
    for idx, row in df[depth_mask & is_blurry_mask & (~is_bad_crop_mask)].head(N).iterrows():
        depth_m = row[depth_field]
        line_segment_length_px = object_length_m * FOCAL_LENGTH / depth_m
        image_url = row.image_url
        bucket, key = 'aquabyte-crops', urlparse(image_url, allow_fragments=False).path.lstrip('/')
        
        image_f = s3_access_utils.download_from_s3(bucket, key)
        
        im = Image.open(image_f)
        draw = ImageDraw.Draw(im)
        draw.line((100, 100, 100+line_segment_length_px, 100))
        
        f_name = os.path.basename(key)
        f = os.path.join(modified_images_dir, '{}_{}'.format(lo, hi), 'rejected', f_name)
        if not os.path.exists(os.path.dirname(f)):
            os.makedirs(os.path.dirname(f))
        im.save(f)
        
    # accepted images
    for idx, row in reconciled_df[reconciled_depth_mask & qa_accept_mask].head(N).iterrows():
        depth_m = row[depth_field]
        line_segment_length_px = object_length_m * FOCAL_LENGTH / depth_m
        image_url = row.image_url
        bucket, key = 'aquabyte-crops', urlparse(image_url, allow_fragments=False).path.lstrip('/')
        image_f = s3_access_utils.download_from_s3(bucket, key)
        
        im = Image.open(image_f)
        draw = ImageDraw.Draw(im)
        draw.line((100, 100, 100+line_segment_length_px, 100))
        
        f_name = os.path.basename(key)
        f = os.path.join(modified_images_dir, '{}_{}'.format(lo, hi), 'accepted', f_name)
        if not os.path.exists(os.path.dirname(f)):
            os.makedirs(os.path.dirname(f))
        im.save(f)

        

In [None]:
df[(df.stereo_depth_m >= 0.43) & (df.stereo_depth_m <= 0.46) & is_blurry_mask & (~is_bad_crop_mask)].head(20).image_url

In [None]:
df[df.image_url.str.contains('702_1953_3290_3000')].stereo_depth_m

In [None]:
df[df.image_url.str.contains('366_1350_2442_2229')].stereo_depth_m

In [None]:
df[df.image_url.str.contains('0_1127_2674_2012')].stereo_depth_m

<h1> Generate depth values </h1>

In [None]:
depth_field = 'stereo_depth_m'
valid_mask = (reconciled_df[depth_field] > 0.2) & (reconciled_df[depth_field] < 0.7)
plt.figure(figsize=(20, 10))
plt.hist(reconciled_df.loc[valid_mask & reconciled_accept_mask, depth_field], bins=20)
plt.show()

In [None]:
depth_field = 'single_image_depth_m'
plt.figure(figsize=(20, 10))
plt.hist(reconciled_df.loc[reconciled_accept_mask, depth_field], bins=20)
plt.show()