In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Download all available data for Vikane and Tittelsnes </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

# get Cogito data

query = """
    select * from lati_fish_detections_lice_annotations where pen_id=37;
"""
cogito_df = rds_access_utils.extract_from_database(query)

cogito_df['is_qa'] = False

# get reconciled data

query = """
    select * from lati_fish_detections_lice_annotations_reconciled where pen_id = 37;
"""
reconciled_df = rds_access_utils.extract_from_database(query)
reconciled_df['is_qa'] = False


In [None]:
# combine into single dataframe
agg_df = pd.concat([cogito_df, reconciled_df], axis=0)

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

<h1> Metric Generator (Bryton) </h1>

In [None]:
def get_bucket_key(image_url):
    if 'aquabyte-crops-lati' not in image_url:
        bucket, key = 'aquabyte-crops', urlparse(image_url, allow_fragments=False).path.lstrip('/')
    else:
        components = urlparse(image_url, allow_fragments=False).path.lstrip('/').split('/')
        bucket, key = components[0], os.path.join(*components[1:])
    return bucket, key


def get_green_luminance(filename):
    img = np.array(Image.open(filename))
    
    black_threshold = 15
    glare_threshold = 100

    test2 = img[:,:,1][(img[:,:,1] > black_threshold) & (img[:,:,1] < glare_threshold)]
    return np.mean(test2)

def download_from_url(image_url):
    bucket, key = get_bucket_key(image_url)
    s3_access_utils.download_from_s3(bucket, key)


<h1> Bremnes Tittelsnes Analysis </h1>

In [None]:
ts_mask = cogito_df.captured_at >= '2019-09-20'
pen_id_mask = cogito_df.pen_id == 37

df = cogito_df[ts_mask & pen_id_mask]

In [None]:
FISH_WIDTH_M = 0.108
FISH_LENGTH_M = 0.534
FOCAL_LENGTH = 4015

def depth_fn(x):
    w, h = x['width'], x['height']
    theta = np.arctan(h / w) * (180.0 / np.pi)
    phi = np.arctan(FISH_WIDTH_M / FISH_LENGTH_M) * (180.0 / np.pi)
    if theta < phi:
        return w
    elif theta > 90.0 - phi:
        return h
    else:
        return (h**2 + w**2)**0.5

def process_data_df(df):
    df = df[df.is_cleaner_fish != True]
    df['image_width'] = df.metadata.apply(lambda x: x['width'])
    df['image_height'] = df.metadata.apply(lambda x: x['height'])
    df['length_px'] = df.metadata.apply(lambda x: depth_fn(x))
    df['single_image_depth_m'] = FOCAL_LENGTH * FISH_LENGTH_M / df.length_px
    return df

df = process_data_df(df)


In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.single_image_depth_m, bins=100, color='blue', alpha=0.5)
plt.hist(df[df.is_skipped != True].single_image_depth_m, bins=100, color='red', alpha=0.5)
plt.grid()
plt.show()

In [None]:
dof_mask = (df.single_image_depth_m > 0.5) & (df.single_image_depth_m < 1.1)
bad_crop_mask = (df.is_bad_crop | df.is_bad_crop_cut_off | df.is_bad_crop_many_fish | df.is_bad_orientation | df.is_cleaner_fish | df.is_obstructed )
reject_mask = df.is_skipped == True

original_skip_rate = df[reject_mask].shape[0] / df.shape[0]
print('Original skip rate: {}'.format(original_skip_rate))

skip_rate_after_hard_depth_cutoff = df[dof_mask & reject_mask].shape[0] / df[dof_mask].shape[0]
print('Skip rate after hard depth cutoff: {}'.format(skip_rate_after_hard_depth_cutoff))


pct_rejects_in_dof_bad_crop = (df[dof_mask & bad_crop_mask].shape[0] / df[dof_mask & reject_mask].shape[0])
print('Percentage of rejects within depth of field that are bad crops: {}'.format(pct_rejects_in_dof_bad_crop))

skip_rate_assuming_perfect_cropper = (df[dof_mask & reject_mask & ~bad_crop_mask].shape[0] / df[dof_mask & ~bad_crop_mask].shape[0])
print('Skip rate assuming hard depth cutoff & perfect cropper: {}'.format(skip_rate_assuming_perfect_cropper))


In [None]:
df[df.is_skipped == False].image_url

In [None]:
#!/usr/bin/env python
# coding: utf-8

# In[45]:


import numpy as np
from matplotlib import pyplot as plt
import math
import psycopg2
import json

# tittlesness
# FISH_LENGTH = 534;
# FISH_WIDTH = 108;

# # vikane
# FISH_LENGTH = 294;
# FISH_WIDTH = 65;

# hiskolmen
FISH_LENGTH = 685;
FISH_WIDTH = 133;

FOCAL_LENGTH = 4015;
THETA = math.atan(FISH_WIDTH / FISH_LENGTH) * (180 /  math.pi); 


def data_pull(annotation_state_id, pen_id):
    res = []

    print("Pulling...")
    #connect to DB
#     host = 'aquabyte-prod.cfwlu7jbdcqj.eu-west-1.rds.amazonaws.com'
#     user = 'aquabyte'
#     password = 'B^}UNbLU9gcVxe9JMuBfzmm'
#     dbname = 'aquabyte_prod'
    host = 'aquabyte-service-metadata.cfwlu7jbdcqj.eu-west-1.rds.amazonaws.com'
    user = 'aquabyte_ro'
    password = 'kristian2019'
    dbname = 'aquabyte_prod_dw'
    
    conn = None
    try:
        conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+password)
        cur = conn.cursor()
        # execute statement
        cur.execute("select                     captured_at,                     left_crop_metadata->>'width' as width,                     left_crop_metadata->>'height' as height,                     left_crop_metadata->>'crop_area' as crop_area,                     left_crop_metadata->>'mean_luminance' as mean_lum,                     left_crop_metadata->>'mean_green_luminance' as mean_green_lum,                     skip_reasons                     from prod.crop_annotation                     where pen_id = "+pen_id+" and captured_at > '2019-09-20' and annotation_state_id = "+annotation_state_id)
        
        # fetch rows
        rows = cur.fetchall()
        for row in rows:
            res.append(row)

        cur.close()
        print("SUCCESSFULLY SELECTED FROM DB.")
    except psycopg2.DatabaseError as error:
        print(error)
        print("COULD NOT CONNECT TO DB")
    finally:
        if conn is not None:
            conn.close()

    print("Operation done successfully")
    return res

def get_depth(width, height):
    pixelLength = 0;
    phi = math.atan(width/height) * (180 / math.pi);
    if (phi < THETA):
        pixelLength = width;
    elif phi > (90 - THETA):
        pixelLength = height
    else:
        pixelLength = math.sqrt(math.pow(height, 2) + math.pow(width, 2))
    
    depth = (FOCAL_LENGTH * FISH_LENGTH) / pixelLength;
    return depth

def calculate():
   
    #4 Annotator skips
    #3 Annotator accepts
    
    #6 QA skips
    #7 QA accepts
    
    pen_id = '40'
    
    res = data_pull('3', pen_id)
    print("ROW COUNT:", len(res))

    
    # do calculations
    accepted_data = []
    for row in res:
        width = int(row[1])
        height = int(row[2])    

        depth = get_depth(width, height)
        #mean_green_lum = float(row[5])
        accepted_data.append(depth/10)
        
        
    res2 = data_pull('4', pen_id)
    total = len(res2)
    print("ROW COUNT:", len(res2))
    
    # do calculations
    skipped_data = []

    for row in res2:
        width = int(row[1])
        height = int(row[2])    
        
        depth = get_depth(width, height)
        #mean_green_lum = float(row[5])
        skipped_data.append(depth/10)
        
        

    #mixed
    plt.title("Hiskolmen depth accepts+skips > 9/15")
    _, bins, _ = plt.hist(accepted_data, bins=100, range=[0, 200], density=False)
    _ = plt.hist(skipped_data, bins=bins, alpha=0.5, density=False)
    return accepted_data, skipped_data
    

    
    
accepted_data, skipped_data = calculate()


In [None]:
rdf[(~rdf.is_accepted_in_qa) & (rdf.is_bad_crop_cut_off | rdf.is_bad_crop | rdf.is_bad_crop_many_fish | rdf.is_bad_orientation | rdf.is_obstructed) & (rdf.single_image_depth_m < 1.1)].shape[0] / rdf[(~rdf.is_accepted_in_qa) & (rdf.single_image_depth_m < 1.1)].shape[0]



In [None]:
25/(25+37.5)