In [None]:
import json
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from filter_optimization.filter_optimization_task import extract_biomass_data

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

plt.rcParams['font.size'] = 18

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

query = """
    select p.id as id, s.name as site_name, p.name as pen_name from pens p
    left join sites s
    on p.site_id = s.id
    order by p.id;
"""

df_pens = rds_access_utils.extract_from_database(query)

pen_infos = {}

for index, pen in df_pens.iterrows():
    pen_infos[pen.id] = pen

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

query = """
    select date, biomass_data from day_summaries where pen_id = 132 and date > '2020-07-15' and date < '2020-08-27' and biomass_data is not null;
"""

df = rds_access_utils.extract_from_database(query)

df.index = df['date']

In [None]:
dates = []
weights = []

for index, row in df.iterrows():
    date = row['date']
    avgWeight = row['biomass_data']['smartData']['avgWeight']
    dates.append(date)
    weights.append(avgWeight)
    
plt.figure(figsize=(20, 10))
plt.plot(df.index, np.array(weights))
plt.xlabel('Date')
plt.ylabel('Avg Weight')
plt.title('Varholmen: Pen 13 Average Weight')

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

def get_depth(pen_id, start_date, end_date):
    query = """
        select avg(value) from stream.telemetry where pen_id = %i and
        attribute = 'aquabyte_smooth_sensors_depth_meters' and ts_source > '%s' and ts_source < '%s' and value != 'NaN'
        limit 1;
    """ % (pen_id, start_date, end_date)
    
    df = rds_access_utils.extract_from_database(query)
    
    print(df)
    
    if len(df) > 0:
        if df.ix[0, 'avg'] is not None:
            return df.ix[0, 'avg']
    return 0

In [None]:
pens = [
    {
        'pen_id': 88,
        'start_date': '2020-02-01 00:00',
        'end_date': '2020-03-04 00:00'
    },
    {
        'pen_id': 108,
        'start_date': '2020-04-15 00:00',
        'end_date': '2020-05-10 00:00'
    },
    {
        'pen_id': 66,
        'start_date': '2020-05-15 00:00',
        'end_date': '2020-06-09 00:00'
    },
    {
        'pen_id': 37,
        'start_date': '2020-06-01 00:00',
        'end_date': '2020-06-17 00:00'
    },
    {
        'pen_id': 86,
        'start_date': '2020-07-01 00:00',
        'end_date': '2020-07-15 00:00'
    },
    {
        'pen_id': 95,
        'start_date': '2020-07-01 00:00',
        'end_date': '2020-07-24 00:00'
    },
    {
        'pen_id': 129,
        'start_date': '2020-07-01 00:00',
        'end_date': '2020-07-20 00:00'
    },
    {
        'pen_id': 133,
        'start_date': '2020-07-15 00:00',
        'end_date': '2020-08-01 00:00'
    },
    {
        'pen_id': 100,
        'start_date': '2020-07-15 00:00',
        'end_date': '2020-07-31 00:00'
    },
    {
        'pen_id': 56,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 60,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 131,
        'start_date': '2020-08-05 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 132,
        'start_date': '2020-08-05 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 124,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 125,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 1,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 4,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 5,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 136,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 137,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
#     {
#         'pen_id': 138,
#         'start_date': '2020-08-01 00:00',
#         'end_date': '2020-08-30 00:00'
#     },
    {
        'pen_id': 127,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 126,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 114,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    },
    {
        'pen_id': 116,
        'start_date': '2020-08-01 00:00',
        'end_date': '2020-08-30 00:00'
    }
]


In [None]:
queryCache = {}

In [None]:
depthCache = {}

In [None]:
fig, axes = plt.subplots(len(pens), 1, figsize=(10, len(pens) * 5))
fig.tight_layout(pad = 5)

depths = []
sds = []
sites = []

a = None

for index, pen in enumerate(pens):
    pen_id = pen['pen_id']
    start_date = pen['start_date']
    end_date = pen['end_date']
    
    if pen_id in queryCache and start_date in queryCache[pen_id] and end_date in queryCache[pen_id][start_date]:
        df = queryCache[pen_id][start_date][end_date]
    else:
        df = extract_biomass_data(pen_id, start_date, end_date, 0.99)
        queryCache[pen_id] = { start_date: { end_date: df } }
    
    df.hour = df.index.hour
        
    if pen_id in depthCache and start_date in depthCache[pen_id] and end_date in depthCache[pen_id][start_date]:
        depth = depthCache[pen_id][start_date][end_date]
    else:
        depth = get_depth(pen_id, start_date, end_date)
        depthCache[pen_id] = { start_date: { end_date: depth } }
    
    mask = df.estimated_k_factor > 0.0
    a = df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean())
    
    depths.append(depth)
    sds.append(np.std(a.values / np.mean(a.values)))
    sites.append(pen_infos[pen_id]['site_name'])
        
    #df.hour = (df.hour + 2) % 24 # time zone hack
    
    
    axes[index].bar(a.index, height=a.values / np.mean(a.values))
    axes[index].set_xlabel('UTC Hour')
    axes[index].set_ylabel('Average Weight (g)')
    axes[index].set_title('Pen %i: %s at %0.2f depth' % (pen_id, pen_infos[pen_id]['site_name'], depth))
    axes[index].grid()
    axes[index].set_xlim([-1, 25])
    diff = (max(a.values) / np.mean(a) - min(a.values) / np.mean(a)) / 2
    axes[index].set_ylim([min(a.values)/ np.mean(a) - diff, max(a.values)/ np.mean(a) + diff])

In [None]:
plt.figure(figsize=(10, 10))

plt.scatter(depths, sds)

for i, site in enumerate(sites):
    plt.annotate('%s' % (site,), (depths[i], sds[i]))

In [None]:



# df = extract_biomass_data(60, '2020-06-01', '2020-08-31', 0.99)
# df.hour = (df.hour + 2) % 24
# plt.figure(figsize=(10, 5))
# mask = df.estimated_k_factor > 0.0
# df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
# plt.grid()

In [None]:
df = extract_biomass_data(100, '2020-06-01', '2020-08-31', 0.99)
df.hour = (df.hour + 2) % 24
plt.figure(figsize=(10, 5))
mask = df.estimated_k_factor > 0.0
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(88, '2020-02-01', '2020-03-06', 0.99)
df.hour = (df.hour + 2) % 24
plt.figure(figsize=(10, 5))
mask = df.estimated_k_factor > 0.0
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(95, '2020-04-01', '2020-07-30', 0.99)
df.hour = (df.hour + 2) % 24
plt.figure(figsize=(10, 5))
mask = df.estimated_k_factor > 0.0
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(131, '2020-08-12', '2020-08-26', 0.99)
df.hour = (df.hour + 2) % 24
plt.figure(figsize=(10, 5))
mask = df.estimated_k_factor > 0.0
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(132, '2020-08-12', '2020-08-26', 0.99)
df.hour = (df.hour + 2) % 24
plt.figure(figsize=(10, 5))
mask = df.estimated_k_factor > 0.0
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(131, '2020-06-01', '2020-08-31', 0.99)

plt.figure(figsize=(20, 10))
mask = df.estimated_k_factor > 1.08
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(132, '2020-06-01', '2020-08-31', 0.99)

plt.figure(figsize=(20, 10))
mask = df.estimated_k_factor > 1.08
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.shape[0]).plot()

In [None]:
from research.weight_estimation.keypoint_utils.optics import pixel2world
import numpy as np
from research.weight_estimation.keypoint_utils import body_parts

def get_raw_3d_coordinates(keypoints, cm):
    wkps = pixel2world([item for item in keypoints['leftCrop'] if item['keypointType'] != 'BODY'],
                       [item for item in keypoints['rightCrop'] if item['keypointType'] != 'BODY'],
                       cm)

    all_wkps = np.array([list(wkps[bp]) for bp in body_parts.core_body_parts])
    return all_wkps

In [None]:
depths = []
for idx, row in df.iterrows():
    try:
        wkps = get_raw_3d_coordinates(row.annotation, row.camera_metadata)
        depth = np.median(wkps[:, 1])
    except:
        depth = None
    depths.append(depth)
    

In [None]:
df['depth'] = depths

In [None]:
plt.hist(df.depth, bins=20)
plt.show()

In [None]:
plt.hist(df.depth, bins=20)
plt.show()

In [None]:
plt.hist(df.depth, bins=20)
plt.show()

In [None]:
df = extract_biomass_data(124, '2020-08-10', '2020-08-29', 0.99)

plt.figure(figsize=(20, 10))
mask = df.estimated_k_factor > 1.25
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
df = extract_biomass_data(124, '2020-08-10', '2020-08-29', 0.99)

plt.figure(figsize=(15, 8))
mask = df.estimated_k_factor > 1.3
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean()).plot()
plt.grid()

In [None]:
plt.figure(figsize=(15, 8))
df[mask].groupby('hour')['estimated_weight_g'].agg(lambda x: x.shape[0]).plot()
plt.grid()

In [None]:
rds