In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
import seaborn as sns
from IPython.display import Image
import seaborn as sns

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_columns', 500)

<h1> Load Data </h1>

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from lati_fish_detections_lice_annotations where pen_id = 65 and captured_at between '2020-01-16' and '2020-01-21';
"""
df = rds_access_utils.extract_from_database(query)



<h1> Compute features </h1>

In [None]:
def generate_center_coordinate(metadata, x_direction=True):
    if x_direction:
        x = metadata['x_coord'] + 0.5 * metadata['width']
        return x
    y = metadata['y_coord'] + 0.5 * metadata['height']
    return y

def retrieve_depth(metadata):
    if 'depth_m_weekly_linear_model' in metadata.keys():
        return metadata['depth_m_weekly_linear_model']['value']
    return None


df['centroid_x'] = df.metadata.apply(lambda x: generate_center_coordinate(x, x_direction=True))
df['centroid_y'] = df.metadata.apply(lambda x: generate_center_coordinate(x, x_direction=False))
df['depth'] = df.metadata.apply(lambda x: retrieve_depth(x))
df['is_submitted'] = df.is_skipped == False
df['crop_area'] = df.metadata.apply(lambda x: x['crop_area'])

In [None]:
accept_rates = []
is_submitted_mask = df.is_submitted == True
crop_area_list = list(np.percentile(df.crop_area, range(0, 110, 10)))
for idx in range(len(crop_area_list) - 1):
    low_ca, high_ca = crop_area_list[idx], crop_area_list[idx+1]
    mask = (df.crop_area > low_ca) & (df.crop_area < high_ca)
    if df[mask].shape[0] > 0:
        accept_rate = df[mask & is_submitted_mask].shape[0] / df[mask].shape[0]
    accept_rates.append(accept_rate)

plt.bar(x=range(len(accept_rates)), height=accept_rates)


In [None]:
accept_rates = []
centroid_x_list = list(np.percentile(df.centroid_x, range(0, 110, 10)))
for idx in range(len(centroid_x_list) - 1):
    low_cx, high_cx = centroid_x_list[idx], centroid_x_list[idx+1]
    mask = (df.centroid_x > low_cx) & (df.centroid_x < high_cx)
    if df[mask].shape[0] > 0:
        accept_rate = df[mask & is_submitted_mask].shape[0] / df[mask].shape[0]
    accept_rates.append(accept_rate)

plt.bar(x=range(len(accept_rates)), height=accept_rates)



In [None]:
accept_rates = []
centroid_y_list = list(np.percentile(df.centroid_y, range(0, 110, 10)))
for idx in range(len(centroid_y_list) - 1):
    low_cy, high_cy = centroid_y_list[idx], centroid_y_list[idx+1]
    mask = (df.centroid_y > low_cy) & (df.centroid_y < high_cy)
    if df[mask].shape[0] > 0:
        accept_rate = df[mask & is_submitted_mask].shape[0] / df[mask].shape[0]
    accept_rates.append(accept_rate)

plt.bar(x=range(len(accept_rates)), height=accept_rates)



In [None]:
percentile_size = 10
is_submitted_mask = df.is_submitted == True
crop_area_list = list(np.percentile(df.crop_area, range(0, 100+percentile_size, percentile_size)))
accept_rates = np.zeros([len(crop_area_list)-1, len(crop_area_list)-1])
for i in range(len(crop_area_list) - 1):
    low_ca, high_ca = crop_area_list[i], crop_area_list[i+1]
    ca_mask = (df.crop_area > low_ca) & (df.crop_area < high_ca)
    centroid_y_list = list(np.percentile(df[ca_mask].centroid_y, range(0, 100+percentile_size, percentile_size)))
    for j in range(len(centroid_y_list) - 1):
        low_cy, high_cy = centroid_y_list[j], centroid_y_list[j+1]
        cy_mask = (df.centroid_y > low_cy) & (df.centroid_y < high_cy)
        mask = ca_mask & cy_mask
        if df[mask].shape[0] > 0:
            accept_rates[i, j] = df[mask & is_submitted_mask].shape[0] / df[mask].shape[0]

plt.figure(figsize=(20, 10))
sns.heatmap(accept_rates.T, annot=True)
plt.xlabel('A')
plt.show()


In [None]:
percentile_size = 10
is_submitted_mask = df.is_submitted == True
accept_rates = np.zeros([len(crop_area_list)-1, len(centroid_x_list)-1])
crop_area_list = list(np.percentile(df.crop_area, range(0, 110, 10)))
for i in range(len(crop_area_list) - 1):
    low_ca, high_ca = crop_area_list[i], crop_area_list[i+1]
    ca_mask = (df.crop_area > low_ca) & (df.crop_area < high_ca)
    centroid_x_list = list(np.percentile(df[ca_mask].centroid_x, range(0, 110, 10)))
    for j in range(len(centroid_x_list) - 1):
        low_cx, high_cx = centroid_x_list[j], centroid_x_list[j+1]
        cx_mask = (df.centroid_x > low_cx) & (df.centroid_x < high_cx)
        mask = ca_mask & cx_mask
        if df[mask].shape[0] > 0:
            accept_rates[i, j] = df[mask & is_submitted_mask].shape[0] / df[mask].shape[0]

plt.figure(figsize=(20, 10))
sns.heatmap(accept_rates.T, annot=True)
plt.xlabel('A')
plt.show()
