In [None]:
import json
import os
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
from filter_optimization.filter_optimization_task import extract_biomass_data
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

plt.rcParams['font.size'] = 18

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
prod_rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

query = """
    select p.id as id, s.name as site_name, p.name as pen_name from pens p
    left join sites s
    on p.site_id = s.id
    order by p.id;
"""

df_pens = prod_rds_access_utils.extract_from_database(query)

pen_infos = {}

for index, pen in df_pens.iterrows():
    pen_infos[pen.id] = pen

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
# rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

# depthCache = {}

# def get_depth(pen_id, start_date, end_date):
#     if pen_id in depthCache and start_date in depthCache[pen_id] and end_date in depthCache[pen_id][start_date]:
#         depth = depthCache[pen_id][start_date][end_date]
#     else:
#         depth = 0
        
#         query = """
#             select avg(value) from stream.telemetry where pen_id = %i and
#             attribute = 'aquabyte_smooth_sensors_depth_meters' and ts_source > '%s' and ts_source < '%s' and value != 'NaN'
#             limit 1;
#         """ % (pen_id, start_date, end_date)

#         df = rds_access_utils.extract_from_database(query)
    
#         if len(df) > 0:
#             if df.ix[0, 'avg'] is not None:
#                 depth = df.ix[0, 'avg']
        
#         depthCache[pen_id] = { start_date: { end_date: depth } }
    
#     return depth

In [None]:
# df_feeding = pd.read_csv('varholmen_pen6_feeding.csv')
# df_feeding.index = pd.to_datetime(df_feeding.date, dayfirst = True)

In [None]:
# df_feeding['weight_g'] = pd.to_numeric(df_feeding['weight_g'].astype(str).str.replace(',',''), errors='coerce')
# df_feeding_adj = df_feeding[df_feeding.index >= '2020-07-12']

In [None]:
queryCache = {}

In [None]:
pen_id = 131
df_start_date = '2020-07-12'
df_end_date = '2020-08-26'

if pen_id in queryCache and df_start_date in queryCache[pen_id] and df_end_date in queryCache[pen_id][df_start_date]:
    df = queryCache[pen_id][df_start_date][df_end_date]
else:
    df = extract_biomass_data(pen_id, df_start_date, df_end_date, 0.99)
    # df = extract_biomass_data(pen_id, '2020-08-24', '2020-09-03', 0.99)

    df.date = pd.to_datetime(df.date)
    df['week'] = df.date.apply(lambda x: x.weekofyear)

    depths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        depths.append(depth)
    df['depth'] = depths
    
    queryCache[pen_id] = { df_start_date: { df_end_date: df } }


In [None]:
def get_feeding_hours(df):
    weight_by_hour = df.groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean())
    density_by_hour = df.groupby('hour')['estimated_weight_g'].agg(lambda x: len(x) )
    pct_density_by_hour = density_by_hour / len(df)
#     for amt in amts:
#         depth_by_hour = df[(df.estimated_weight_g < amt) & (df.estimated_weight_g > amt - 1000) ].groupby('hour')['depth'].agg(lambda x: x.mean() )
#         axes[index, 2].plot(depth_by_hour, label=amt)
        
#         density_by_hour_amt = df[(df.estimated_weight_g < amt) & (df.estimated_weight_g > amt - 1000) ].groupby('hour')['estimated_weight_g'].agg(lambda x: len(x) ) / len(df_week)
#         axes[index, 1].plot(density_by_hour_amt)
        
    high_density = [ pct_density_by_hour.index[i] for i in np.where(pct_density_by_hour > 1 / 24)[0] ]
    high_density_min = min(high_density)
    high_density_max = max(high_density)
    
    return high_density_min, high_density_max

get_feeding_hours(df)

In [None]:
weeknums = list(set(df['week']))
weeknums.sort()

amts = [2000, 3000, 4000, 5000, 6000, 7000]

fig, axes = plt.subplots(len(weeknums), 3, figsize=(20, len(weeknums) * 5))
fig.tight_layout(pad = 5)

for index, week in enumerate(weeknums):
    df_week = df[df['week'] == week]
    start_date = min(df_week.date)
    end_date = max(df_week.date)
    
#     depth = get_depth(pen_id, start_date, end_date)
#     print(depth, start_date, end_date)
    
    weight_by_hour = df_week.groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean())
    density_by_hour = df_week.groupby('hour')['estimated_weight_g'].agg(lambda x: len(x) )
    pct_density_by_hour = density_by_hour / len(df_week)
    for amt in amts:
        depth_by_hour = df_week[(df_week.estimated_weight_g < amt) & (df_week.estimated_weight_g > amt - 1000) ].groupby('hour')['depth'].agg(lambda x: x.mean() )
        axes[index, 2].plot(depth_by_hour, label=amt)
        
        density_by_hour_amt = df_week[(df_week.estimated_weight_g < amt) & (df_week.estimated_weight_g > amt - 1000) ].groupby('hour')['estimated_weight_g'].agg(lambda x: len(x) ) / len(df_week)
        axes[index, 1].plot(density_by_hour_amt)
        
    high_density = [ pct_density_by_hour.index[i] for i in np.where(pct_density_by_hour > 0.04)[0] ]
    high_density_min = min(high_density)
    high_density_max = max(high_density)
    
    axes[index, 0].plot(weight_by_hour)
    axes[index, 0].plot(weight_by_hour[pct_density_by_hour > 1 / 24], color = 'red')
    axes[index, 1].plot(pct_density_by_hour)
    axes1 = axes[index, 1].twinx()
    axes1.plot(density_by_hour)
    axes[index, 0].set_title('%s (%i): W %i (%s to %s) @ %0.2fm' % (pen_infos[pen_id]['site_name'], pen_id, week, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), depth))
    axes[index, 1].set_title('Density (%i fish)' % (len(df_week), ))
    axes[index, 0].set_xlabel('Hour (UTC)')
    axes[index, 1].set_xlabel('Hour (UTC)')
    axes[index, 0].set_ylabel('Weight (g)')
    axes[index, 1].set_ylabel('% of fish')
    axes[index, 0].axvline(high_density_min)
    axes[index, 0].axvline(high_density_max)
    axes[index, 1].axvline(high_density_min)
    axes[index, 1].axvline(high_density_max)
    axes[index, 2].axvline(high_density_min)
    axes[index, 2].axvline(high_density_max)
    axes1.set_ylabel('# of fish')
    if index == 0:
        axes[index, 2].legend()

In [None]:
week = 34

df_week = df[df['week'] == week]
dates = list(set(df_week.date))
dates.sort()

fig, axes = plt.subplots(len(dates), 3, figsize=(20, len(dates) * 5))
fig.tight_layout(pad = 5)

for index, date in enumerate(dates):
    df_day = df[df['date'] == date]

    depth = get_depth(pen_id, date, dates[min(index + 1, len(dates) - 1)])
    print(depth, start_date, end_date)

    weight_by_hour = df_day.groupby('hour')['estimated_weight_g'].agg(lambda x: x.mean())
    density_by_hour = df_day.groupby('hour')['estimated_weight_g'].agg(lambda x: len(x) )
    pct_density_by_hour = density_by_hour / len(df_day)
    for amt in amts:
        depth_by_hour = df_day[(df_day.estimated_weight_g < amt) & (df_day.estimated_weight_g > amt - 1000) ].groupby('hour')['depth'].agg(lambda x: x.mean() )
        axes[index, 2].plot(depth_by_hour, label=amt)
        
        density_by_hour_amt = df_day[(df_day.estimated_weight_g < amt) & (df_day.estimated_weight_g > amt - 1000) ].groupby('hour')['estimated_weight_g'].agg(lambda x: len(x) ) / len(df_day)
        axes[index, 1].plot(density_by_hour_amt)
    
    high_density = [ pct_density_by_hour.index[i] for i in np.where(pct_density_by_hour > 0.04)[0] ]
    high_density_min = min(high_density)
    high_density_max = max(high_density)
    
    axes[index, 0].plot(weight_by_hour)
    axes[index, 0].plot(weight_by_hour[pct_density_by_hour > 0.04], color = 'red')
    axes[index, 1].plot(pct_density_by_hour)
    axes1 = axes[index, 1].twinx()
    axes1.plot(density_by_hour)
    axes[index, 0].set_title('%s (%i): Date %s: %0.2f Depth' % (pen_infos[pen_id]['site_name'], pen_id, date, depth))
    axes[index, 1].set_title('Density (%i fish)' % (len(df_day), ))
    axes[index, 0].set_xlabel('Hour (UTC)')
    axes[index, 1].set_xlabel('Hour (UTC)')
    axes[index, 0].set_ylabel('Weight (g)')
    axes[index, 1].set_ylabel('% of fish')
    axes[index, 0].axvline(high_density_min)
    axes[index, 0].axvline(high_density_max)
    axes[index, 1].axvline(high_density_min)
    axes[index, 1].axvline(high_density_max)
    axes[index, 2].axvline(high_density_min)
    axes[index, 2].axvline(high_density_max)
    axes1.set_ylabel('# of fish')

In [None]:
df.head()

In [None]:
df_daily = df.groupby('date')['estimated_weight_g'].agg(lambda x: x.mean() )
df_daily.index = pd.to_datetime(df_daily.index)
df_daily.tail()

In [None]:
df_feeding_adj

In [None]:
#plt.plot(df_feeding_adj.pct_fed)
plt.plot(df_feeding_adj.feeding_kg)

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_feeding_adj.index, df_feeding_adj['weight_g'], color = 'red', label = 'Feeding')
plt.plot(df_daily.index, df_daily.values + 300, label = 'Aquabyte')
plt.legend()

In [None]:
plt.figure(figsize=(20, 10))
plt.bar(df_feeding_adj.index, df_feeding_adj['weight_g'] - df_daily)

In [None]:
# df_feeding_adj['weight_g'] - df_daily