In [None]:
from collections import defaultdict
import json
import os
import numpy as np
import pandas as pd
from datetime import datetime
from research.utils.data_access_utils import RDSAccessUtils
from sklearn.linear_model import LinearRegression
from research.utils.datetime_utils import day_difference, add_days
from research.utils.datetime_utils import get_dates_in_range

import matplotlib.pyplot as plt
from scipy.stats import norm
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

DATE_FORMAT = '%Y-%m-%d'

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
dfs = []

In [None]:
dates = [
    ['2020-02-01', '2020-02-28', 'Feb'],
    ['2020-03-01', '2020-03-31', 'Mar'],
    ['2020-04-01', '2020-04-30', 'Apr'],
    ['2020-05-01', '2020-05-31', 'May'],
    ['2020-06-01', '2020-06-30', 'Jun'],
    ['2020-07-01', '2020-07-31', 'Jul'],
    ['2020-08-01', '2020-08-31', 'Aug'],
    ['2020-09-01', '2020-09-30', 'Sep'],
    ['2020-10-01', '2020-10-28', 'Oct'],
    ['2020-11-01', '2020-11-30', 'Nov'],
    ['2020-12-01', '2020-12-31', 'Dec']
#     ['2021-01-01', '2020-01-31', 'Jan'],
]


In [None]:
date

In [None]:
akpd_filter = 0.99
    
for date in dates:
    start_date = date[0]
    end_date = date[1]

    if True:
        query = '''
            SELECT pen_id, captured_at, annotation, camera_metadata, estimated_weight_g, akpd_score FROM prod.biomass_computations
              WHERE captured_at >= '%s'
              AND captured_at <= '%s'
              AND akpd_score > %0.4f;
        ''' % (start_date, end_date, akpd_filter)
    else:
        query = """
            SELECT pen_id, captured_at, annotation, camera_metadata, estimated_weight_g, akpd_score FROM (
              (SELECT pen_id, captured_at, left_crop_url, annotation, camera_metadata FROM prod.crop_annotation cas
              INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
              WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
              AND cas.annotation_state_id = 3) a
            RIGHT JOIN 
              (SELECT left_crop_url, estimated_weight_g, akpd_score FROM prod.biomass_computations
              WHERE prod.biomass_computations.captured_at >= '%s'
              AND prod.biomass_computations.captured_at <= '%s'
              AND prod.biomass_computations.akpd_score > %0.4f) bc 
            ON 
              (a.left_crop_url=bc.left_crop_url)
            ) x
            WHERE x.captured_at >= '%s'
            AND x.captured_at <= '%s';
        """ % (start_date, end_date, akpd_filter, start_date, end_date)

    df = rds_access_utils.extract_from_database(query)

    depths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        depths.append(depth)
    df['depth'] = depths

    df = df.sort_values('captured_at').copy(deep=True)
    df.index = pd.to_datetime(df.captured_at)
    df['date'] = df.index.date
    
    dfs.append(df)

In [None]:
df.sort_values('captured_at').copy(deep=True)

In [None]:
plt.figure(figsize=(20, 10))

avg_depth = []

for index, df in enumerate(dfs):
    date = dates[index]
    avg_depth.append(np.percentile(df.depth, 99))
    
plt.plot(avg_depth)
    
plt.title('Avg depth from camera over 2020')
plt.xlabel('Avg weight')
plt.ylabel('Depth')
plt.legend()

In [None]:
plt.figure(figsize=(20, 10))

avg_depth = []

for index, df in enumerate(dfs):
    df2 = df[(df.estimated_weight_g > 4000) & (df.estimated_weight_g < 5000)]
    avg_depth.append(np.percentile(df2.depth, 99))
    
plt.plot(avg_depth)
    
plt.title('Avg depth from camera over 2020')
plt.xlabel('Avg weight')
plt.ylabel('Depth')
plt.legend()

In [None]:
plt.figure(figsize=(20, 10))

for pen_id in list(set(dfs[4].pen_id)):
    pen_plots = []
    for index, df in enumerate(dfs):
        df2 = df[(df.pen_id == pen_id) & (df.estimated_weight_g > 4000) & (df.estimated_weight_g < 5000)]
        if len(df2) == 0:
            pen_plots.append(None)
        else:
            pen_plots.append(np.percentile(df2.depth, 99))
    plt.plot(pen_plots)
    
avg_depth = []

for index, df in enumerate(dfs):
    date = dates[index]
    avg_depth.append(np.percentile(df.depth, 99))
    
plt.plot(avg_depth, 'o')
    
plt.title('Avg depth from camera over 2020')
plt.xlabel('Avg weight')
plt.ylabel('Depth')
plt.legend()

In [None]:
all_pen_ids = list(set(dfs[-1].pen_id))

for df in dfs[-4:-1]:
    all_pen_ids = [p for p in list(set(df.pen_id)) if p in all_pen_ids]
    
    
all_pen_ids

# len(dfs[-4:-1])

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import scipy.stats as stats
import matplotlib.pyplot as plt

all_pens = all_pen_ids

fig, axes = plt.subplots(nrows=len(all_pens), ncols=3, figsize=(30, 5*len(all_pens)))

for pen, ax in zip(all_pens, axes):
    depth1 = dfs[-4][dfs[-4].pen_id == pen].depth
    depth2 = dfs[-3][dfs[-3].pen_id == pen].depth
    depth3 = dfs[-2][dfs[-2].pen_id == pen].depth
    
    x = np.linspace(0.5, 2, 100)
    
#     ax[0].plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(depth1)))
#     ax[1].plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(depth2)))
#     ax[2].plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(depth3)))
    ax[0].plot(x, stats.norm.pdf(x, *stats.norm.fit(depth1)))
    ax[1].plot(x, stats.norm.pdf(x, *stats.norm.fit(depth2)))
    ax[2].plot(x, stats.norm.pdf(x, *stats.norm.fit(depth3)))
    
    ax[0].hist(depth1, range=(0.5, 2), bins = 20, density = True)
    ax[1].hist(depth2, range=(0.5, 2), bins = 20, density = True)
    ax[2].hist(depth3, range=(0.5, 2), bins = 20, density = True)

In [None]:
import scipy.stats as stats
import matplotlib.pyplot as plt

all_pens = all_pen_ids

fig, axes = plt.subplots(nrows=len(all_pens), ncols=3, figsize=(30, 5*len(all_pens)))

for pen, ax in zip(all_pens, axes):
    depth1 = dfs[-4][dfs[-4].pen_id == pen].estimated_weight_g
    depth2 = dfs[-3][dfs[-3].pen_id == pen].estimated_weight_g
    depth3 = dfs[-2][dfs[-2].pen_id == pen].estimated_weight_g
    
    x = np.linspace(0, 10000, 100)
    ax[0].plot(x, stats.norm.pdf(x, *stats.norm.fit(depth1)))
    ax[1].plot(x, stats.norm.pdf(x, *stats.norm.fit(depth2)))
    ax[2].plot(x, stats.norm.pdf(x, *stats.norm.fit(depth3)))
    
    ax[0].hist(depth1, range=(0, 10000), bins = 30, density = True)
    ax[1].hist(depth2, range=(0, 10000), bins = 30, density = True)
    ax[2].hist(depth3, range=(0, 10000), bins = 30, density = True)

In [None]:
plt.figure(figsize=(20, 10))

depth_weights = []

for index, weight in enumerate(weights):
    depth_weights.append([])

for index, df in enumerate(dfs):
    date = dates[index]
    
#     depths = np.arange(0, 2.5, 0.1)
#     weights = []

#     for depth in depths:
#         mask = (df['depth'] > depth) & (df['depth'] < depth + 0.1)
#         weights.append(np.mean(df[mask]['estimated_weight_g']))

    weights = np.arange(0, 8000, 500)
    depths = []
    
    for index, weight in enumerate(weights):
        mask = (df['estimated_weight_g'] > weight) & (df['estimated_weight_g'] < weight + 500)
        depths.append(np.mean(df[mask]['depth']))
        
        depth_weights[index].append(np.mean(df[mask]['depth']))
        
    plt.plot(weights, depths, label = date[2])
    
plt.title('Avg depth from camera over 2020')
plt.xlabel('Avg weight')
plt.ylabel('Depth')
plt.legend()

In [None]:
for index, depth_weight in enumerate(depth_weights):
    plt.plot(depth_weight, label = weights[index])
    
# plt.legend()

In [None]:
df_pt1.depth.resample('D')

In [None]:
depth_by_day = df_pt1.depth.resample('D').agg(lambda x: x.mean()).fillna(method='ffill')
depth_by_day_95 = df_pt1.depth.resample('D').agg(lambda x: np.percentile(x or [1], 95)).fillna(method='ffill')

In [None]:
df_pt1.head()

In [None]:
plt.figure(figsize=(20, 10))

plt.plot(depth_by_day)
plt.plot(depth_by_day)
plt.title('Avg depth from camera over 2020')
plt.xlabel('Date')
plt.ylabel('Depth from camera')

In [None]:
df.tail()

In [None]:
df_pt1 = df