In [None]:
import json
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

from research_lib.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import pixel2world
import plotly.express as px

In [None]:
def add_date_time(df):
    df.index = pd.to_datetime(df.captured_at)
    dates = df.index.date.astype(str)
    df['date'] = dates
    df['hour'] = df.index.hour

def add_spatial_attributes(df):
    yaws, pitches, rolls, depths = [], [], [], []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        try:
            world_keypoints = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
            depth = np.median([x[1] for x in world_keypoints.values()])
            u = world_keypoints['ADIPOSE_FIN'] - world_keypoints['ANAL_FIN']
            v = world_keypoints['UPPER_LIP'] - world_keypoints['TAIL_NOTCH']
            yaw = np.arctan(v[1] / abs(v[0])) * (180.0 / np.pi)
            pitch = np.arctan(v[2] / abs(v[0])) * (180.0 / np.pi)
            roll = np.arctan(u[1] / u[2]) * (180.0 / np.pi)
        except TypeError as err:
            yaw, pitch, roll, depth = None, None, None, None
        yaws.append(yaw)
        pitches.append(pitch)
        depths.append(depth)
        rolls.append(roll)
        
    df['yaw'] = yaws
    df['pitch'] = pitches
    df['roll'] = rolls
    df['depth'] = depths

    

In [None]:
rds = RDSAccessUtils()

In [None]:
query = """
    select captured_at, annotation, camera_metadata from prod.biomass_computations
    where pen_id=56
    and captured_at between '2020-06-01' and '2020-08-25'
    and akpd_score > 0.95
"""

df = rds.extract_from_database(query)

In [None]:
add_spatial_attributes(df)

In [None]:
add_date_time(df)

In [None]:
plt.plot(df.yaw.resample('D').agg(lambda x: x.mean()))
plt.show()

In [None]:
plt.plot(df.pitch.resample('D').agg(lambda x: x.median()))
plt.show()

In [None]:
plt.plot(df.roll.resample('D').agg(lambda x: x.median()))
plt.show()

In [None]:
plt.plot(df.yaw.resample('D').agg(lambda x: x.std()))
plt.show()

In [None]:
plt.plot(df.pitch.resample('D').agg(lambda x: x.std()))
plt.show()

In [None]:
plt.plot(df.roll.resample('D').agg(lambda x: x.std()))
plt.show()

In [None]:
plt.plot(df.roll.resample('D').agg(lambda x: x.shape[0]))
plt.show()

In [None]:
fig = px.violin(df, y='roll', x='date')
fig.show()

In [None]:
fig = px.violin(df, y='yaw', x='date')
fig.show()

In [None]:
theta_cutoffs = list(np.arange(5, 75, 5))
low_sample_sizes = []
for theta_cutoff in theta_cutoffs:
    mask = (df.yaw.abs() < theta_cutoff) & (df.pitch.abs() < theta_cutoff) & (df.roll.abs() < theta_cutoff)
    sample_sizes = df[mask].yaw.resample('D').agg(lambda x: x.shape[0]).values
#     sample_sizes = sample_sizes[sample_sizes > 300]
    if len(sample_sizes) == 0:
        low_sample_sizes.append(0)
        continue
    low_sample_size = np.percentile(sample_sizes, 50)
    low_sample_sizes.append(low_sample_size)
    
    

In [None]:
plt.plot(theta_cutoffs, low_sample_sizes)
plt.grid()
plt.show()

In [None]:
sample_sizes = sample_sizes[sample_sizes > 100]

<h1> Orientation threshold analysis across pens </h1>

In [None]:
from research.utils.datetime_utils import add_days

In [None]:
query = 'select id from customer.pens;'
pen_df = rds.extract_from_database(query)
pen_ids = sorted(list(pen_df.id.values))

In [None]:
for pen_id in pen_ids:
    print('Pen ID: {}'.format(pen_id))
    query = """
        select distinct DATE(captured_at) from prod.biomass_computations
        where pen_id = {};
    """.format(pen_id)
    date_df = rds.extract_from_database(query)
    if date_df.shape[0] == 0:
        continue
    dates = sorted(list(date_df.date.astype(str).values))
    end_date = dates[-1]
    start_date = add_days(end_date, -120)
    
    query = """
        select captured_at, annotation, camera_metadata from prod.biomass_computations
        where pen_id={}
        and captured_at between '{}' and '{}'
        and akpd_score > 0.95
    """.format(pen_id, start_date, end_date)

    df = rds.extract_from_database(query)
    if df.shape[0] == 0 or df.camera_metadata.iloc[0] is None:
        continue
    add_spatial_attributes(df)
    add_date_time(df)
    
    theta_cutoffs = list(np.arange(5, 75, 5))
    low_sample_sizes = []
    for theta_cutoff in theta_cutoffs:
        mask = (df.yaw.abs() < theta_cutoff) & (df.pitch.abs() < theta_cutoff) & (df.roll.abs() < theta_cutoff)
        sample_sizes = df[mask].yaw.resample('D').agg(lambda x: x.shape[0]).values
        sample_sizes = sample_sizes[sample_sizes > 0]
        if len(sample_sizes) == 0:
            low_sample_sizes.append(0)
            continue
        low_sample_size = np.mean(sample_sizes)
        low_sample_sizes.append(low_sample_size)
    
    plt.plot(theta_cutoffs, low_sample_sizes)
    plt.grid()
    plt.show()
    
    

    
    

In [None]:
start_date, end_date

In [None]:
df.captured_at.resample('D').agg(lambda x: x.shape[0])

In [None]:
sorted(list(df.date.unique()))