In [None]:
import datetime as dt
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.visualize import Visualizer, _normalize_world_keypoints
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
from aquabyte.akpd_scorer import generate_confidence_score
from keras.models import load_model

import random
import torch
from aquabyte.data_loader import KeypointsDataset, NormalizeCentered2D, ToTensor, BODY_PARTS
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from sklearn.model_selection import train_test_split
from copy import copy, deepcopy
# import pyarrow.parquet as pq
from scipy.spatial import Delaunay
from mpl_toolkits.mplot3d import Axes3D

pd.set_option('display.max_rows', 500)

In [None]:
def get_world_keypoints(row):
    return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)

def prepare_df(aggregate_df):
    
    # use QA'ed entries, and only use Cogito entries when QA data is unavailable
    qa_df = aggregate_df[aggregate_df.is_qa == True]
    cogito_df = aggregate_df[(aggregate_df.is_qa != True) & \
                             ~(aggregate_df.left_image_url.isin(qa_df.left_image_url))]
    df = pd.concat([qa_df, cogito_df], axis=0)
    
    # add world keypoints
    df['world_keypoints'] = df.apply(lambda x: get_world_keypoints(x), axis=1)
    return df


rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_RESEARCH_SQL_CREDENTIALS'])))
query = """
    select * from research.fish_metadata a left join keypoint_annotations b
    on a.left_url = b.left_image_url 
    where b.keypoints -> 'leftCrop' is not null
    and b.keypoints -> 'rightCrop' is not null
    and b.is_qa = false
    and b.captured_at < '2019-09-20';
"""
aggregate_df = rds_access_utils.extract_from_database(query)
df = prepare_df(aggregate_df)


In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.world_keypoints.apply(lambda x: np.median([wkp[1] for wkp in x.values()])))
plt.grid()
plt.show()

In [None]:
df['median_depth'] = df.world_keypoints.apply(lambda x: np.median([wkp[1] for wkp in x.values()]))

In [None]:
s3_access_utils = S3AccessUtils('/root/data')


# initialize data transforms so that we can run inference with biomass neural network
normalize_centered_2D_transform_biomass = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()

# load neural network weights
akpd_scorer_network = load_model('/root/data/alok/biomass_estimation/playground/akpd_scorer_model_TF.h5') # make this better

In [None]:
def generate_akpd_score(row_id, ann, cm):
    
    # run AKPD scoring network
    input_sample = {
        'keypoints': ann,
        'cm': cm,
        'stereo_pair_id': row_id,
        'single_point_inference': True
    }
    akpd_score = generate_confidence_score(input_sample, akpd_scorer_network)
    return akpd_score


In [None]:
akpd_scores = []
for idx, row in df.iterrows():
    akpd_score = generate_akpd_score(row.id, row.keypoints, row.camera_metadata)
    akpd_scores.append(akpd_score)
df['akpd_score'] = akpd_scores


In [None]:
where_clause = ''
for idx, row in df.loc[df.akpd_score < 1e-4, ['id', 'akpd_score']].iterrows():
    kpid = row.id
    where_clause += f' OR id = {int(kpid)}'


In [None]:
ids = []
for idx, row in df.loc[df.akpd_score < 1e-5, ['id', 'akpd_score']].iterrows():
    kpid = row.id
    ids.append(kpid)


In [None]:
df[df.fish_id=='190808-d20dc94e-fc76-4ffb-a4f5-f296d9ac368d'].id

In [None]:
prod_research_sql_credentials = json.load(open(os.environ['PROD_RESEARCH_SQL_CREDENTIALS']))
rds_access_utils = RDSAccessUtils(prod_research_sql_credentials)
s3_access_utils = S3AccessUtils('/root/data')
visualizer = Visualizer(s3_access_utils, rds_access_utils)


In [None]:
keypoint_annotation_id = 507806
visualizer.load_data(keypoint_annotation_id)
visualizer.display_crops(overlay_keypoints=True, show_labels=True)

In [None]:
keypoint_annotation_id = 648822
visualizer.load_data(keypoint_annotation_id)
visualizer.display_crops(overlay_keypoints=True, show_labels=False)

In [None]:
{item['keypointType']: [item['xFrame'], item['yFrame']] for item in df[df.id == 635713].keypoints.iloc[0]['leftCrop']}

In [None]:
{item['keypointType']: [item['xFrame'], item['yFrame']] for item in df[df.id == 635713].keypoints.iloc[0]['rightCrop']}

In [None]:
diffs = []
count = 0
for idx, row in df.iterrows():
    if count % 10000 == 0:
        print(count)
    count += 1
    ann_c = row.keypoints
    ann_dict_left_kps_c = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_c['leftCrop']}
    ann_dict_right_kps_c = {item['keypointType']: [item['xFrame'], item['yFrame']] for item in ann_c['rightCrop']}
    these_diffs = []
    for bp in BODY_PARTS:
        diff = ann_dict_left_kps_c[bp][1] - ann_dict_right_kps_c[bp][1]
        these_diffs.append(diff)
    diffs.append(np.mean(these_diffs))

In [None]:
df['diffs'] = diffs
df.index = pd.to_datetime(df.captured_at)
df.diffs.resample('D', how=lambda x: x.median())