In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Download all available data for Vikane and Tittelsnes </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

# get Cogito data

query = """
    select * from lati_fish_detections_lice_annotations where 
    (pen_id=37 or pen_id=56 or pen_id=57 or pen_id=58 or pen_id=59 or pen_id=60);
"""
cogito_df = rds_access_utils.extract_from_database(query)

cogito_df = cogito_df[cogito_df.is_skipped == True]
cogito_df['is_accepted_in_qa'] = False

# get reconciled data

query = """
    select * from lati_fish_detections_lice_annotations_reconciled where 
    (pen_id=37 or pen_id=56 or pen_id=57 or pen_id=58 or pen_id=59 or pen_id=60);
"""
reconciled_df = rds_access_utils.extract_from_database(query)
reconciled_df['is_accepted_in_qa'] = False
reconciled_df.loc[reconciled_df.adult_female_count >= 0, 'is_accepted_in_qa'] = True


# combine into single dataframe
df = pd.concat([cogito_df, reconciled_df], axis=0)

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

<h1> Metric Generator (Bryton) </h1>

In [None]:
def get_bucket_key(image_url):
    if 'aquabyte-crops-lati' not in image_url:
        bucket, key = 'aquabyte-crops', urlparse(image_url, allow_fragments=False).path.lstrip('/')
    else:
        components = urlparse(image_url, allow_fragments=False).path.lstrip('/').split('/')
        bucket, key = components[0], os.path.join(*components[1:])
    return bucket, key


def get_green_luminance(filename):
    img = np.array(Image.open(filename))
    
    black_threshold = 15
    glare_threshold = 100

    test2 = img[:,:,1][(img[:,:,1] > black_threshold) & (img[:,:,1] < glare_threshold)]
    return np.mean(test2)

def download_from_url(image_url):
    bucket, key = get_bucket_key(image_url)
    s3_access_utils.download_from_s3(bucket, key)


<h1> Blom  Vikane Depth / Luminance Analysis </h1>

In [None]:
ts_mask = df.captured_at >= '2019-09-15'
pen_id_mask = df.pen_id == 37
accept_mask = df.is_accepted_in_qa


In [None]:
pool = Pool(20)
pool.map(download_from_url, df[ts_mask & pen_id_mask & accept_mask].image_url.tolist())


In [None]:
tdf = df[ts_mask & pen_id_mask & ~accept_mask].sample(2000)
pool.map(download_from_url, tdf.image_url.tolist())

In [None]:
rdf = pd.concat([df[ts_mask & pen_id_mask & accept_mask], tdf], axis=0)

In [None]:
green_luminances = []
count = 0

ts_mask = df.captured_at >= '2019-09-20'
pen_id_mask = df.pen_id == 56

for idx, row in rdf.iterrows():
    try:
        image_url = row.image_url
        bucket, key = get_bucket_key(image_url)
        f_name = s3_access_utils.download_from_s3(bucket, key)
        green_luminance = get_green_luminance(f_name)
        green_luminances.append(green_luminance)
    except:
        green_luminances.append(None)
    if count % 100 == 0:
        print(count)
    count += 1




In [None]:
df_cache = df.copy()

In [None]:
df.loc[ts_mask & pen_id_mask, 'green_luminance'] = green_luminances

In [None]:
rdf_cache = rdf.copy()
rdf['green_luminance'] = green_luminances

In [None]:
FISH_WIDTH_M = 0.065
FISH_LENGTH_M = 0.294
FOCAL_LENGTH = 4015

def depth_fn(x):
    w, h = x['width'], x['height']
    theta = np.arctan(h / w) * (180.0 / np.pi)
    phi = np.arctan(FISH_WIDTH_M / FISH_LENGTH_M) * (180.0 / np.pi)
    if theta < phi:
        return w
    elif theta > 90.0 - phi:
        return h
    else:
        return (h**2 + w**2)**0.5

def process_data_df(df):
    df = df[df.is_cleaner_fish != True]
    df['image_width'] = df.metadata.apply(lambda x: x['width'])
    df['image_height'] = df.metadata.apply(lambda x: x['height'])
    df['length_px'] = df.metadata.apply(lambda x: depth_fn(x))
    df['single_image_depth_m'] = FOCAL_LENGTH * FISH_LENGTH_M / df.length_px
    return df

df = process_data_df(df)


In [None]:
plt.figure(figsize=(20, 10))

plt.scatter(df[ts_mask & pen_id_mask & ~accept_mask].single_image_depth_m, 
            df[ts_mask & pen_id_mask & ~accept_mask].green_luminance, color='blue', label='Rejected', s=50)

plt.scatter(df[ts_mask & pen_id_mask & accept_mask].single_image_depth_m, 
            df[ts_mask & pen_id_mask & accept_mask].green_luminance, color='red', label='Accepted', s=80)

plt.title('Accepts / Rejects in depth & luminance space')
plt.xlabel('Depth (m) (based on single image)')
plt.ylabel('Green luminance')
plt.legend()
plt.grid()
plt.show()



In [None]:
FISH_WIDTH_M = 0.108
FISH_LENGTH_M = 0.534
FOCAL_LENGTH = 4015

def depth_fn(x):
    w, h = x['width'], x['height']
    theta = np.arctan(h / w) * (180.0 / np.pi)
    phi = np.arctan(FISH_WIDTH_M / FISH_LENGTH_M) * (180.0 / np.pi)
    if theta < phi:
        return w
    elif theta > 90.0 - phi:
        return h
    else:
        return (h**2 + w**2)**0.5

def process_data_df(df):
    df = df[df.is_cleaner_fish != True]
    df['image_width'] = df.metadata.apply(lambda x: x['width'])
    df['image_height'] = df.metadata.apply(lambda x: x['height'])
    df['length_px'] = df.metadata.apply(lambda x: depth_fn(x))
    df['single_image_depth_m'] = FOCAL_LENGTH * FISH_LENGTH_M / df.length_px
    return df

rdf = process_data_df(rdf)


In [None]:
plt.figure(figsize=(20, 10))

plt.scatter(rdf[rdf.is_accepted_in_qa == False].single_image_depth_m, 
            rdf[rdf.is_accepted_in_qa == False].green_luminance, color='blue', label='Rejected', s=50)

plt.scatter(rdf[rdf.is_accepted_in_qa == True].single_image_depth_m, 
            rdf[rdf.is_accepted_in_qa == True].green_luminance, color='red', label='Accepted', s=80)

plt.title('Accepts / Rejects in depth & luminance space')
plt.xlabel('Depth (m) (based on single image)')
plt.ylabel('Green luminance')
plt.legend()
plt.grid()
plt.show()



In [None]:
plt.figure(figsize=(20, 10))
plt.hist(rdf.single_image_depth_m, bins=20, color='blue', alpha=0.5)
plt.hist(rdf[rdf.is_accepted_in_qa].single_image_depth_m, bins=20, color='red', alpha=0.5)
plt.grid()
plt.show()

In [None]:
rdf[rdf.is_accepted_in_qa & (rdf.single_image_depth_m < 1.1)].shape[0] / rdf[rdf.single_image_depth_m < 1.1].shape[0]

In [None]:
rdf[(~rdf.is_accepted_in_qa) & (rdf.is_bad_crop_cut_off) & (rdf.single_image_depth_m < 1.1)].shape[0] / rdf[(~rdf.is_accepted_in_qa) & (rdf.single_image_depth_m < 1.1)].shape[0]




In [None]:
92% too blurry, 35% bad crop, 32% too dark, 7% obstructed, 20% bad orientation, 

In [None]:
rdf[(~rdf.is_accepted_in_qa) & (rdf.is_bad_crop_cut_off | rdf.is_bad_crop | rdf.is_bad_crop_many_fish | rdf.is_bad_orientation | rdf.is_obstructed) & (rdf.single_image_depth_m < 1.1)].shape[0] / rdf[(~rdf.is_accepted_in_qa) & (rdf.single_image_depth_m < 1.1)].shape[0]



In [None]:
25/(25+37.5)