In [1]:
%load_ext autoreload
%autoreload 2

from research.weight_estimation.population_metrics import PopulationMetricsEstimator
import json
import os

from flask import Flask
from flask import request

import psycopg2
import boto3
from research.weight_estimation.population_metrics import PopulationMetricsEstimator

In [15]:

SCHEDULE_BY_PEN = {
    88: (7, 16),
    66: (0, 24),
}


def get_db_params_from_aws():
#     ssm = boto3.client('ssm', region_name='eu-west-1')
#     host_param = ssm.get_parameter(Name='/DW_DB_RO_HOST', WithDecryption=True)
#     host = host_param['Parameter']['Value']

#     user_param = ssm.get_parameter(Name='/DW_DB_RO_USER', WithDecryption=True)
#     user = user_param['Parameter']['Value']

#     password_param = ssm.get_parameter(Name='/DW_DB_RO_PASSWORD', WithDecryption=True)
#     password = password_param['Parameter']['Value']

#     dbname_param = ssm.get_parameter(Name='/DW_DB_RO_NAME', WithDecryption=True)
#     dbname = dbname_param['Parameter']['Value']

    credentials = json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS']))
    host, user, password, dbname = credentials['host'], credentials['user'], credentials['password'], \
                                   credentials['database']

    print("Successfully retrieved DB params.")
    return host, user, password, dbname


def get_data_from_db(pen_id, dates_to_include, akpd_score_cutoff=0.99):

    # get sampling schedule by pen ID (default to 24 hour sampling if pen not available)
    if pen_id in SCHEDULE_BY_PEN.keys():
        hour_start, hour_end = SCHEDULE_BY_PEN[pen_id]
    else:
        hour_start, hour_end = 0, 24

    date_list = '({})'.format(", ".join([f"'{d}'" for d in dates_to_include]))
    print("Connecting to DB...")

    host, user, password, dbname = get_db_params_from_aws()

    res = []
    conn = None
    try:
        conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+password)
        cur = conn.cursor()

        query = "SELECT\
                to_char(captured_at, 'YYYY-MM-DD') AS date,\
                estimated_weight_g, estimated_length_mm\
                FROM prod.biomass_computations\
                WHERE\
                to_char(captured_at, 'YYYY-MM-DD') IN {0}\
                AND date_part('hour', captured_at) BETWEEN {1} AND {2}\
                AND pen_id = {3}\
                AND group_id IN ('{3}')\
                AND akpd_score > {4}\
                AND estimated_weight_g > 0\
                AND estimated_weight_g != double precision 'NaN'\
                ORDER BY date DESC".format(date_list, hour_start, hour_end, pen_id, akpd_score_cutoff)
        print(query)

        # execute statement
        cur.execute(query)

        # fetch rows
        rows = cur.fetchall()
        for row in rows:
            res.append(row)

        cur.close()
    except psycopg2.DatabaseError as error:
        print(error)
        print("COULD NOT CONNECT TO DB")
    finally:
        if conn is not None:
            conn.close()

    print(f"Successfully retrieved from DB: {len(res)} rows")
    return res


# flask app
application = api = Flask(__name__)
application.add_url_rule('/', 'index', (lambda: "hi"))


def process_bcs(biomass_computations):
    new_bcs = []
    for bc in biomass_computations:
        date, weight, length = bc
        estimated_k_factor = 1e5 * (weight / (length**3)) if (weight and length) else None
        new_bcs.append((date, weight, estimated_k_factor))
    new_bcs = sorted(new_bcs, key=lambda x: x[0])
    return new_bcs


def generate_smart_metrics(data):
    pen_id = data['penId']
    dates_to_compute = sorted(list(data['datesToCompute']))
    dates_to_include = sorted(list(data['datesToInclude']))

    resp = {}

    # Get data from DB
    biomass_computations = get_data_from_db(pen_id, dates_to_include)
    if not biomass_computations:
        return resp

    biomass_computations = process_bcs(biomass_computations)

    # If any dates to compute pre-date first available date in dates to include, return None
    first_available_date = biomass_computations[0][0]
    if not all([first_available_date <= date for date in dates_to_compute]):
        print('DATE IS BEFORE CAMERA DATA!')
        return resp

    pme = PopulationMetricsEstimator(biomass_computations)
    
    for date in dates_to_compute:
        metrics = pme.generate_smart_metrics_on_date(date,
                                                     max_day_difference=3,
                                                     incorporate_future=True,
                                                     apply_growth_rate=True)

        # smart_sample_size is np.int64. Quick workaround to make json happy.
        smart_sample_size = int(metrics['smart_sample_size'])
        resp_for_date = dict(
            weightMovingAvg=metrics['smart_average_weight'],
            weightMovingDist=metrics['smart_distribution'],
            movingKFactor=metrics['smart_average_kf'],
            dailyGrowthRate=metrics['growth_rate'],
            numMovingAvgBatiFish=smart_sample_size
        )
        resp[date] = resp_for_date

    return resp


In [18]:
data = {
    "penId": 23,
    "datesToCompute": [
        "2020-04-18",
        "2020-04-19",
        "2020-04-20",
        "2020-04-21",
        "2020-04-22",
        "2020-04-23",
        "2020-04-24",
        "2020-04-25",
        "2020-04-26",
        "2020-04-27",
        "2020-04-28",
        "2020-04-29",
        "2020-04-30",
        "2020-05-01",
        "2020-05-02",
        "2020-05-03",
        "2020-05-04",
        "2020-05-05",
        "2020-05-06",
        "2020-05-07",
        "2020-05-08",
        "2020-05-09",
        "2020-05-10",
        "2020-05-11",
        "2020-05-12",
        "2020-05-13",
        "2020-05-14",
        "2020-05-15",
        "2020-05-16",
        "2020-05-17",
        "2020-05-18"
    ],
    "datesToInclude": [
        "2020-04-04",
        "2020-04-05",
        "2020-04-06",
        "2020-04-07",
        "2020-04-08",
        "2020-04-09",
        "2020-04-10",
        "2020-04-11",
        "2020-04-12",
        "2020-04-13",
        "2020-04-14",
        "2020-04-15",
        "2020-04-16",
        "2020-04-17",
        "2020-04-18",
        "2020-04-19",
        "2020-04-20",
        "2020-04-21",
        "2020-04-22",
        "2020-04-23",
        "2020-04-24",
        "2020-04-25",
        "2020-04-26",
        "2020-04-27",
        "2020-04-28",
        "2020-04-29",
        "2020-04-30",
        "2020-05-01",
        "2020-05-02",
        "2020-05-03",
        "2020-05-04",
        "2020-05-05",
        "2020-05-06",
        "2020-05-07",
        "2020-05-08",
        "2020-05-09",
        "2020-05-10",
        "2020-05-11",
        "2020-05-12",
        "2020-05-13",
        "2020-05-14",
        "2020-05-15",
        "2020-05-16",
        "2020-05-17",
        "2020-05-18"
    ]
}

In [19]:
generate_smart_metrics(data)

Connecting to DB...
Successfully retrieved DB params.
SELECT                to_char(captured_at, 'YYYY-MM-DD') AS date,                estimated_weight_g, estimated_length_mm                FROM prod.biomass_computations                WHERE                to_char(captured_at, 'YYYY-MM-DD') IN ('2020-04-04', '2020-04-05', '2020-04-06', '2020-04-07', '2020-04-08', '2020-04-09', '2020-04-10', '2020-04-11', '2020-04-12', '2020-04-13', '2020-04-14', '2020-04-15', '2020-04-16', '2020-04-17', '2020-04-18', '2020-04-19', '2020-04-20', '2020-04-21', '2020-04-22', '2020-04-23', '2020-04-24', '2020-04-25', '2020-04-26', '2020-04-27', '2020-04-28', '2020-04-29', '2020-04-30', '2020-05-01', '2020-05-02', '2020-05-03', '2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09', '2020-05-10', '2020-05-11', '2020-05-12', '2020-05-13', '2020-05-14', '2020-05-15', '2020-05-16', '2020-05-17', '2020-05-18')                AND date_part('hour', captured_at) BETWEEN 0 AND 24        

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


KeyError: 'smart_sample_size'