In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Get lice annotation data </h1>

In [None]:
pen_id = 1
start_date = '2019-05-10'
end_date = '2019-05-15'
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from lati_fish_detections_lice_annotations where pen_id={0}
    and captured_at >= '{1}' and captured_at < '{2}';
""".format(pen_id, start_date, end_date)
cogito_df = rds_access_utils.extract_from_database(query)

# get rid of rows that would already appear in the reconciled table
cogito_df = cogito_df[cogito_df.is_skipped == True]
cogito_df['is_accepted_in_qa'] = False

In [None]:
query = """
    select * from lati_fish_detections_lice_annotations_reconciled where pen_id={0}
    and captured_at >= '{1}' and captured_at < '{2}';
""".format(pen_id, start_date, end_date)
reconciled_df = rds_access_utils.extract_from_database(query)
reconciled_df['is_accepted_in_qa'] = False
reconciled_df.loc[reconciled_df.adult_female_count >= 0, 'is_accepted_in_qa'] = True

In [None]:
date = '2019-05-10'

df = pd.concat([cogito_df, reconciled_df], axis=0)
df = df.sort_values('captured_at')
df.index = pd.to_datetime(df.captured_at)
good_crop_rate = df[date].is_accepted_in_qa.rolling('20T').sum() * 3
crop_rate = df[date].is_accepted_in_qa.rolling('20T').count() * 3

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(good_crop_rate.index, good_crop_rate, color='r')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(crop_rate.index, crop_rate, color='r')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df[date].is_accepted_in_qa.index, df[date].is_accepted_in_qa.cumsum())
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df[date].is_accepted_in_qa.index, range(df[date].shape[0]))
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(good_crop_rate.index, good_crop_rate, color='r')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(crop_rate.index, crop_rate, color='r')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df[date].is_accepted_in_qa.index, df[date].is_accepted_in_qa.cumsum())
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df[date].is_accepted_in_qa.index, range(df[date].shape[0]))
plt.grid()
plt.show()

In [None]:
# aggregate_df.to_csv('/root/data/alok/aggregate_df_bremnes_tittelsnes.csv')

In [None]:
s3_access_utils = S3AccessUtils('/root/data')

In [None]:
FISH_WIDTH_M = 0.108
FISH_LENGTH_M = 0.524
FOCAL_LENGTH = 4015

def depth_fn(x):
    w, h = x['width'], x['height']
    theta = np.arctan(h / w) * (180.0 / np.pi)
    phi = np.arctan(FISH_WIDTH_M / FISH_LENGTH_M) * (180.0 / np.pi)
    if theta < phi:
        return w
    elif theta > 90.0 - phi:
        return h
    else:
        return (h**2 + w**2)**0.5

def process_data_df(df):
    df = df[df.is_cleaner_fish != True]
    df['image_width'] = df.metadata.apply(lambda x: x['width'])
    df['image_height'] = df.metadata.apply(lambda x: x['height'])
    df['length_px'] = df.metadata.apply(lambda x: depth_fn(x))
    df['single_image_depth_m'] = FOCAL_LENGTH * FISH_LENGTH_M / df.length_px
    df['stereo_depth_m'] = df.metadata.apply(lambda x: x.get('depth_m'))
    return df

df = process_data_df(df)


In [None]:
accept_mask = df.is_accepted_in_qa
dof_mask = (df.single_image_depth_m > 0.8) & (df.single_image_depth_m < 0.9)



In [None]:
plt.hist(df.loc[(~accept_mask) & dof_mask, 'image_width'], color='b', alpha=0.5)
plt.hist(df.loc[accept_mask & dof_mask, 'image_width'], color='r', alpha=0.5)
plt.show()