In [None]:
import glob
import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
from aquabyte.visualize import Visualizer
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
import json


In [None]:
datasets = [
    {
        "dataset": "vikingfjord_experiment_id_2",
        "credentials": "PROD_SQL_CREDENTIALS",
        "sql_query": "select * from keypoint_annotations where pen_id=17 and keypoints is not null and is_qa=true and captured_at > '2019-07-01' and captured_at < '2019-07-09';",
        "weight": 5.96
    },
    {
        "dataset": "vikingfjord_experiment_id_3",
        "credentials": "PROD_SQL_CREDENTIALS",
        "sql_query": "select * from keypoint_annotations where pen_id=17 and keypoints is not null and is_qa=true and captured_at > '2019-07-09';",
        "weight": 5.71 
    },
    {
        "dataset": "bremnes_experiment_id_4",
        "credentials": "PROD_SQL_CREDENTIALS",
        "sql_query": "select * from keypoint_annotations where pen_id=46 and keypoints is not null and is_qa=true and captured_at > '2019-07-19' and captured_at < '2019-07-22';",
        "weight": 5.46
    },
    {
        "dataset": "bremnes_experiment_id_5",
        "credentials": "PROD_SQL_CREDENTIALS",
        "sql_query": "select * from keypoint_annotations where pen_id=47 and keypoints is not null and is_qa=true and captured_at > '2019-07-19' and captured_at < '2019-07-22';",
        "weight": 5.46
    },
    {
        "dataset": "bremnes_experiment_id_6",
        "credentials": "PROD_SQL_CREDENTIALS",
        "sql_query": "select * from keypoint_annotations where pen_id=46 and keypoints is not null and is_qa=true and captured_at > '2019-07-22' and captured_at < '2019-08-01';",
        "weight": 4.7

    },
    {
        "dataset": "bremnes_experiment_id_7",
        "credentials": "PROD_SQL_CREDENTIALS",
        "sql_query": "select * from keypoint_annotations where pen_id=47 and keypoints is not null and is_qa=true and captured_at > '2019-07-22' and captured_at < '2019-08-01';",
        "weight": 4.7
    }
]

In [None]:
weight_by_dataset = {item['dataset']: item['weight'] for item in datasets}

In [None]:
files = sorted(glob.glob('/root/data/temp/results_f5cfd03d4622c24879cfa9d5f6427bffc4668205_unweighted_model_3800_*.h5'))


In [None]:
files

In [None]:
dataset_names = []
weights = []
preds = []
pct_errors = []
for f in files:
    
    dataset = f.split('/')[-1].split('_unweighted_model_3800_')[-1].replace('.h5', '')
    dataset_names.append(dataset)
    df = pd.read_hdf(f, 'table')
    mask = (df.estimated_biomass_g > 0) & (df.estimated_biomass_g < 20000)
    pred = df[mask].estimated_biomass_g.mean()
    weight = weight_by_dataset[dataset] * 1e3
    weights.append(weight)
    preds.append(pred)
    pct_error = 100.0 * ((pred - weight) / weight)
    pct_errors.append(pct_error)
    

In [None]:
font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 22}

matplotlib.rc('font', **font)
matplotlib.rc('xtick', labelsize=10) 
matplotlib.rc('ytick', labelsize=20) 



In [None]:
plt.figure(figsize=(20, 10))
plt.bar(np.arange(len(pct_errors)), pct_errors)
plt.xticks(np.arange(len(dataset_names)), dataset_names)
plt.title('Waiting Pen Biomass Error')
plt.xlabel('Deployment')
plt.ylabel('% Error in Average Biomass')
plt.grid()
plt.show()

In [None]:
f = '/root/data/temp/results_f5cfd03d4622c24879cfa9d5f6427bffc4668205_unweighted_model_3800_vikingfjord_experiment_id_2.h5'
df = pd.read_hdf(f, 'table')

In [None]:
s3_access_utils = S3AccessUtils('/root/data')
sql_credentials = json.load(open(os.environ["PROD_SQL_CREDENTIALS"]))
rds_access_utils = RDSAccessUtils(sql_credentials)


In [None]:
df.id

In [None]:
v = Visualizer(rds_access_utils=rds_access_utils, s3_access_utils=s3_access_utils)

In [None]:
%matplotlib inline
v.load_data(553461)
v.display_crops(overlay_keypoints=True)

In [None]:
%matplotlib notebook
v.display_3d_keypoints()

In [None]:
df[df.id == 553668].world_keypoints.iloc[0]