In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Download all available data for Vikane and Tittelsnes </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

# get Cogito data

query = """
    select * from lati_fish_detections_lice_annotations where pen_id=40;
"""
cogito_df = rds_access_utils.extract_from_database(query)

cogito_df = cogito_df[cogito_df.is_skipped == True]
cogito_df['is_accepted_in_qa'] = False

# get reconciled data

query = """
    select * from lati_fish_detections_lice_annotations_reconciled where pen_id = 40;
"""
reconciled_df = rds_access_utils.extract_from_database(query)
reconciled_df['is_accepted_in_qa'] = False
reconciled_df.loc[reconciled_df.adult_female_count >= 0, 'is_accepted_in_qa'] = True


# combine into single dataframe
df = pd.concat([cogito_df, reconciled_df], axis=0)

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

<h1> Metric Generator (Bryton) </h1>

In [None]:
def get_bucket_key(image_url):
    if 'aquabyte-crops-lati' not in image_url:
        bucket, key = 'aquabyte-crops', urlparse(image_url, allow_fragments=False).path.lstrip('/')
    else:
        components = urlparse(image_url, allow_fragments=False).path.lstrip('/').split('/')
        bucket, key = components[0], os.path.join(*components[1:])
    return bucket, key


def get_green_luminance(filename):
    img = np.array(Image.open(filename))
    
    black_threshold = 15
    glare_threshold = 100

    test2 = img[:,:,1][(img[:,:,1] > black_threshold) & (img[:,:,1] < glare_threshold)]
    return np.mean(test2)

def download_from_url(image_url):
    bucket, key = get_bucket_key(image_url)
    s3_access_utils.download_from_s3(bucket, key)


<h1> Blom  Vikane Depth / Luminance Analysis </h1>

In [None]:
ts_mask = df.captured_at >= '2019-09-15'
pen_id_mask = df.pen_id == 40
df = df[ts_mask & pen_id_mask]


In [None]:
FISH_WIDTH_M = 0.133
FISH_LENGTH_M = 0.685
FOCAL_LENGTH = 4015

def depth_fn(x):
    w, h = x['width'], x['height']
    theta = np.arctan(h / w) * (180.0 / np.pi)
    phi = np.arctan(FISH_WIDTH_M / FISH_LENGTH_M) * (180.0 / np.pi)
    if theta < phi:
        return w
    elif theta > 90.0 - phi:
        return h
    else:
        return (h**2 + w**2)**0.5

def process_data_df(df):
    df = df[df.is_cleaner_fish != True]
    df['image_width'] = df.metadata.apply(lambda x: x['width'])
    df['image_height'] = df.metadata.apply(lambda x: x['height'])
    df['length_px'] = df.metadata.apply(lambda x: depth_fn(x))
    df['stereo_depth_m'] = df.metadata.apply(lambda x: x.get('depth_m'))
    df['single_image_depth_m'] = FOCAL_LENGTH * FISH_LENGTH_M / df.length_px
    return df

df = process_data_df(df)


In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.single_image_depth_m, bins=20, color='blue', alpha=0.5)
plt.hist(df[df.is_accepted_in_qa].single_image_depth_m, bins=20, color='red', alpha=0.5)
plt.grid()
plt.show()

In [None]:
dof_mask = (df.single_image_depth_m > 0.55) & (df.single_image_depth_m < 1.1)
bad_crop_mask = (df.is_bad_crop | df.is_bad_crop_cut_off | df.is_bad_crop_many_fish | df.is_bad_orientation | df.is_cleaner_fish | df.is_obstructed )
reject_mask = df.is_accepted_in_qa == False

original_skip_rate = df[reject_mask].shape[0] / df.shape[0]
print('Original skip rate: {}'.format(original_skip_rate))

skip_rate_after_hard_depth_cutoff = df[dof_mask & reject_mask].shape[0] / df[dof_mask].shape[0]
print('Skip rate after hard depth cutoff: {}'.format(skip_rate_after_hard_depth_cutoff))


pct_rejects_in_dof_bad_crop = (df[dof_mask & bad_crop_mask].shape[0] / df[dof_mask & reject_mask].shape[0])
print('Percentage of rejects within depth of field that are bad crops: {}'.format(pct_rejects_in_dof_bad_crop))

skip_rate_assuming_perfect_cropper = (df[dof_mask & reject_mask & ~bad_crop_mask].shape[0] / df[dof_mask & ~bad_crop_mask].shape[0])
print('Skip rate assuming hard depth cutoff & perfect cropper: {}'.format(skip_rate_assuming_perfect_cropper))


In [None]:
92% too blurry, 35% bad crop, 32% too dark, 7% obstructed, 20% bad orientation, 

In [None]:
rdf[(~rdf.is_accepted_in_qa) & (rdf.is_bad_crop_cut_off | rdf.is_bad_crop | rdf.is_bad_crop_many_fish | rdf.is_bad_orientation | rdf.is_obstructed) & (rdf.single_image_depth_m < 1.1)].shape[0] / rdf[(~rdf.is_accepted_in_qa) & (rdf.single_image_depth_m < 1.1)].shape[0]



In [None]:
25/(25+37.5)