In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Download Blom Kjeppevikholmen Data </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from lati_fish_detections_lice_annotations where pen_id = 4;
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
query = """
    select * from lati_fish_detections_lice_annotations_reconciled where pen_id = 4;
"""
reconciled_df = rds_access_utils.extract_from_database(query)

<h1> Determine how many crops are cut off due to limited FOV </h1>

In [None]:
df['crop_left_edge_px'] = df.metadata.apply(lambda item: item['x_coord'])
df['crop_right_edge_px'] = df.metadata.apply(lambda item: item['x_coord'] + item['width'])
df['crop_top_edge_px'] = df.metadata.apply(lambda item: item['y_coord'])
df['crop_bottom_edge_px'] = df.metadata.apply(lambda item: item['y_coord'] + item['height'])

In [None]:
bad_crop_mask = df.is_bad_crop == True

# isolate the cases where bad crop is due to fish getting cut off due to limited field of view
left_edge_mask = df.crop_left_edge_px == 0
right_edge_mask = df.crop_right_edge_px == 4096
top_edge_mask = df.crop_top_edge_px == 0
bottom_edge_mask = df.crop_top_edge_px == 3000
edge_mask = left_edge_mask | right_edge_mask# | top_edge_mask | bottom_edge_mask

In [None]:
bad_crop_limited_fov_count = df[bad_crop_mask & edge_mask].shape[0]
bad_crop_count = df[bad_crop_mask].shape[0]
print('Percentage of bad crops due to limited FOV: {}'.format(bad_crop_limited_fov_count / bad_crop_count))

good_crop_edge_count = df[left_edge_mask | right_edge_mask | top_edge_mask | bottom_edge_mask].shape[0]

In [None]:
(good_crop_edge_count - bad_crop_limited_fov_count) / good_crop_edge_count

In [None]:
reconciled_edge_mask = (reconciled_df.lati_fish_detections_lice_annotations_id.isin(df[edge_mask].id))
accept_mask = (reconciled_df.is_skipped==False)
reconciled_df[reconciled_edge_mask & accept_mask].shape[0] / reconciled_df[accept_mask].shape[0]