In [None]:
import json, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors

import pytz 

from datetime import timedelta, datetime

from research.utils.data_access_utils import RDSAccessUtils

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
query = """
    select p.id as id, s.name as site_name, p.name as pen_name from customer.pens p
    left join customer.sites s
    on p.site_id = s.id
    order by p.id;
"""

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))
df_pens = rds_access_utils.extract_from_database(query)

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
queryCache = {}

In [None]:
startDate1 = '2020-04-21'
endDate1 = '2020-04-22'

startDate2 = '2020-05-04'
endDate2 = '2020-05-05'

penIds = [95, 66, 56, 60, 85, 86]

In [None]:
fig, axes = plt.subplots(nrows = len(penIds), ncols = 1, figsize = (10, 5 * len(penIds)))

for index, penId in enumerate(penIds):
    print(penId)
    
    foundPenData = df_pens[df_pens.id == penId].values
    
    if len(foundPenData) > 0:
        foundPen = foundPenData[0]
        siteName = foundPen[1]
        penName = foundPen[2]
    else:
        siteName = 'N/A'
        penName = 'N/A'

    query1 = """
        select captured_at, estimated_weight_g from prod.biomass_computations bc
        where bc.pen_id = %i
        and bc.akpd_score > 0.99
        and bc.captured_at > '%s'
        and bc.captured_at < '%s';
    """ % (penId, startDate1, endDate1)

    query2 = """
        select captured_at, estimated_weight_g from prod.biomass_computations bc
        where bc.pen_id = %i
        and bc.akpd_score > 0.99
        and bc.captured_at > '%s'
        and bc.captured_at < '%s';
    """ % (penId, startDate2, endDate2)

    if query1 in queryCache:
        weights1 = queryCache[query1].copy()
    else:
        weights1 = rds_access_utils.extract_from_database(query1)
        queryCache[query1] = weights1.copy()
        
    if query2 in queryCache:
        weights2 = queryCache[query2].copy()
    else:
        weights2 = rds_access_utils.extract_from_database(query2)
        queryCache[query2] = weights2.copy()

    weights1.index = weights1['captured_at']
    weights1 = weights1.sort_index()

    weights2.index = weights2['captured_at']
    weights2 = weights2.sort_index()
    
    axes[index].hist(weights1['estimated_weight_g'], bins = 20, density = True, facecolor = 'blue', alpha = 0.5)
    axes[index].hist(weights2['estimated_weight_g'], bins = 20, density = True, facecolor = 'red', alpha = 0.5)
    axes[index].set_title('%s %s (%i) Image Score Analysis' % (siteName, penName, penId))