In [None]:
from collections import defaultdict
import json
import os
import numpy as np
import pandas as pd
from datetime import datetime
from research.utils.data_access_utils import RDSAccessUtils
from sklearn.linear_model import LinearRegression
from research.utils.datetime_utils import day_difference, add_days
from research.utils.datetime_utils import get_dates_in_range

import matplotlib.pyplot as plt
from scipy.stats import norm
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

DATE_FORMAT = '%Y-%m-%d'

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
queryCache = {}

In [None]:
pen_id = 56
start_date = '2020-04-25'
end_date = '2020-05-25'
# pen_id = 66
# start_date = '2020-05-20'
# end_date = '2020-06-10'
akpd_filter = 0.99

query = """
    SELECT * FROM (
      (SELECT * FROM prod.crop_annotation cas
      INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
      WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
      AND cas.annotation_state_id = 3
      AND cas.pen_id=%i) a
    RIGHT JOIN 
      (SELECT left_crop_url, estimated_weight_g, akpd_score FROM prod.biomass_computations
      WHERE prod.biomass_computations.captured_at >= '%s'
      AND prod.biomass_computations.captured_at <= '%s'
      AND prod.biomass_computations.akpd_score > %0.4f) bc 
    ON 
      (a.left_crop_url=bc.left_crop_url)
    ) x
    WHERE x.captured_at >= '%s'
    AND x.captured_at <= '%s'
    AND x.pen_id = %i
    AND x.group_id = '%i';
""" % (pen_id, start_date, end_date, akpd_filter, start_date, end_date, pen_id, pen_id)

if query in queryCache:
    df = queryCache[query].copy()
else:
    df = rds_access_utils.extract_from_database(query)
    
    depths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        depths.append(depth)
    df['depth'] = depths
    
    df = df.sort_values('captured_at').copy(deep=True)
    df.index = pd.to_datetime(df.captured_at)
    dates = df.index.date.astype(str)
    df['date'] = dates
    df['hour'] = df.index.hour

    if 'estimated_k_factor' not in df.columns.tolist():
        df['estimated_k_factor'] = 0.0
    
    queryCache[query] = df.copy()

In [None]:
#df.head()

# startDate, startHour = datetime.strptime(df.ix[0]['date'], '%Y-%m-%d'), df.ix[0]['hour']
# endDate, endHour = datetime.strptime(df.ix[-1]['date'], '%Y-%m-%d'), df.ix[-1]['hour']

startDate = df.index[0]
endDate = df.index[-1]

maxWeight = max(df['estimated_weight_g'])
maxWeightInt = int(maxWeight / 1000)

diff = endDate - startDate
days, seconds = diff.days, diff.seconds
hours = int((days * 24 + seconds // 3600) / 1)

a = np.zeros((hours + 1, maxWeightInt + 1))
print(a.shape)

count = 0

dateStrings = []
dates = []

for idx, row in df.iterrows():
    if row.date not in dateStrings:
        dateStrings.append(row.date)
        dates.append(datetime.strptime(row.date, '%Y-%m-%d'))

    diff = idx - startDate
    days, seconds = diff.days, diff.seconds
    hours = int((days * 24 + seconds // 3600) / 1)
    
    weight = row['estimated_weight_g']
    weightInt = int(weight / 1000)
    
    a[hours, weightInt] = a[hours, weightInt] + 1

In [None]:
import pysal as ps

coefs = []

window = 24
skip = 24

for i in np.arange(window, hours, skip):
    b = a[(i - window):i,2:6]
    w = ps.lib.weights.lat2W(b.shape[0], b.shape[1])
    mi = ps.explore.esda.Moran(b, w)
    coefs.append(mi.I)
    
fig, axes = plt.subplots(2, 1, figsize=(15, 20))
axes[0].plot(dates[1:], coefs)

In [None]:
w = ps.lib.weights.lat2W(a.shape[0], a.shape[1])
mi = ps.explore.esda.Moran_Local(a, w)

In [None]:
#f = np.log(1 + np.mean(a, 1))
f = np.mean(a, 1)

w = ps.lib.weights.lat2W(f.shape[0], 1)
mi = ps.explore.esda.Moran_Local(f, w)

plt.plot(mi.Is)

In [None]:
plt.plot(f)

In [None]:
plt.scatter(f, mi.Is)

In [None]:
window = 24
skip = 24

coefs = []

for i in np.arange(window, hours, skip):
    data = f[(i - window):i]
    
    if np.sum(data) < 1:
        coefs.append(coefs[-1])
        continue

    a = (np.percentile(data, 90) - np.percentile(data, 10)) / (np.mean(data))
    
    coefs.append(1 - a / 5)
    #coefs.append(np.percentile(data, 95) / np.mean(data))
    
fig, axes = plt.subplots(2, 1, figsize=(15, 20))
axes[0].bar(dates[1:], coefs)
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Sampling Representativity')
axes[0].set_title('Pen 56 Sampling Representativity')


In [None]:
c = mi.Is.reshape(a.shape)
d = np.mean(c, 1)

In [None]:
plt.plot(d)

In [None]:
window = 24
skip = 24

coefs = []

for i in np.arange(window, hours, skip):
    coefs.append(np.std(d[(i - window):i]))
    
fig, axes = plt.subplots(2, 1, figsize=(15, 20))
axes[0].plot(dates[1:], coefs)
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Sampling Representativity')
axes[0].set_title('Pen 60 Sampling Representativity')
