In [None]:
import json
import os
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta, time
from research.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateFormatter, AutoDateLocator

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

query = """
    select * from day_summaries
    where pen_id = %i
    and biomass_data is not null;
""" % (125, )

df = rds_access_utils.extract_from_database(query)  

In [None]:
for index, row in df.iterrows(): 
    date = row['date']
    numFish = row['biomass_data']['rawData']['numFish']
    avgWeight = row['biomass_data']['rawData']['avgWeight']
    print(date, numFish, avgWeight)

In [None]:
queryCache = {}

In [None]:
pen1 = pd.read_csv('pen1.csv')
pen2 = pd.read_csv('pen2.csv')

In [None]:
np.mean(pen1.weight), np.mean(pen2.weight)

In [None]:
plt.hist(pen1.weight)

In [None]:
plt.hist(pen2.weight)

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

def getPenDF(pen):
    akpd_filter = 0.99

    query = """
       SELECT captured_at, estimated_weight_g, akpd_score FROM prod.biomass_computations
        WHERE prod.biomass_computations.captured_at >= '%s'
        AND prod.biomass_computations.captured_at <= '%s'
        AND prod.biomass_computations.akpd_score > %0.4f
        AND prod.biomass_computations.pen_id = %i;
    """ % (pen['start_date'], pen['end_date'], akpd_filter, pen['pen_id'])

    if query in queryCache:
        df = queryCache[query].copy()
    else:
        df = rds_access_utils.extract_from_database(query)
        queryCache[query] = df.copy()

    df = df.sort_values('captured_at').copy(deep=True)
    df.index = pd.to_datetime(df.captured_at)
    dates = df.index.date.astype(str)
    df['date'] = dates
    df['hour'] = df.index.hour
    
    return df

pens = [
    {
        'pen_id': 124,
        'start_date': '2020-08-20 00:00',
        'end_date': '2020-08-28 00:00'
    },
    {
        'pen_id': 125,
        'start_date': '2020-08-20 00:00',
        'end_date': '2020-08-28 00:00'
    },
    {
        'pen_id': 56,
        'start_date': '2020-08-20 00:00',
        'end_date': '2020-08-28 00:00'
    },
    {
        'pen_id': 60,
        'start_date': '2020-08-20 00:00',
        'end_date': '2020-08-28 00:00'
    },
    {
        'pen_id': 125,
        'start_date': '2020-08-17 00:00',
        'end_date': '2020-08-18 00:00'
    }
]

df1 = getPenDF(pens[0])
df2 = getPenDF(pens[1])
df3 = getPenDF(pens[2])
df4 = getPenDF(pens[3])
df5 = getPenDF(pens[4])

In [None]:
hour = []
avg = []

for i in np.arange(0, 24):
    hour.append(i)
    avg.append(np.mean(df1.estimated_weight_g[df1.hour == i]))
    
plt.plot(hour, avg)

In [None]:
print('2020-08-15', len(df3), np.mean(df3.estimated_weight_g))
print('2020-08-16', len(df4), np.mean(df4.estimated_weight_g))
print('2020-08-17', len(df5), np.mean(df5.estimated_weight_g))

In [None]:
plt.scatter(df.captured_at, df.estimated_weight_g)

In [None]:
s1, a1 = np.mean(pen1.weight), np.mean(df1.estimated_weight_g), 
plt.hist(df1.estimated_weight_g, density = True, alpha = 0.5, label = 'Aquabyte')
plt.hist(pen1.weight, density = True, alpha = 0.5, label = 'Sampling')
plt.legend()
plt.title('Pen 1: Sampling (%i) vs Aquabyte (%i)' % (s1, a1))

In [None]:
s2, a2 = np.mean(pen2.weight), np.mean(df2.estimated_weight_g), 
plt.hist(df2.estimated_weight_g, density = True, alpha = 0.5, label = 'Aquabyte')
plt.hist(pen2.weight, density = True, alpha = 0.5, label = 'Sampling')
plt.legend()
plt.title('Pen 2: Sampling (%i) vs Aquabyte (%i)' % (s2, a2))

In [None]:
plt.hist(df2.estimated_weight_g, density = True, alpha = 0.5, label = '8/26 %i' % (np.mean(df2.estimated_weight_g), ))
plt.hist(df3.estimated_weight_g, density = True, alpha = 0.5, label = '8/15 %i' % (np.mean(df3.estimated_weight_g), ))
plt.legend()

In [None]:
np.mean(pen2.weight), np.mean(df2.estimated_weight_g), 

In [None]:
def get_distribution(weights, bucket_cutoffs):
    dist = {}
    count = 0
    for low, high in zip(bucket_cutoffs, bucket_cutoffs[1:]):
        bucket = f'{1e-3 * low}-{1e-3 * high}'
        bucket_count = weights[(weights >= low) & (weights < high)].shape[0]
        dist[bucket] = bucket_count
        count += bucket_count
    
    dist = {k: round(100 * v / count, 2) for k, v in dist.items()}
    return dist


In [None]:
get_distribution(pen2.weight.values, np.arange(0, 10000, 1000))

In [None]:
pen1.weight.mean()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(pen2.weight.values, bins=50)
plt.grid()
plt.show()

In [None]:
pen1