In [None]:
import json
import os
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
from filter_optimization.filter_optimization_task import extract_biomass_data
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

from scipy import stats
import statsmodels.api as sm

plt.rcParams['font.size'] = 18

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
queryCache = {}

In [None]:
pen_id = 116
df_start_date = '2020-10-26'
df_end_date = '2020-10-30'

if pen_id in queryCache and df_start_date in queryCache[pen_id] and df_end_date in queryCache[pen_id][df_start_date]:
    df = queryCache[pen_id][df_start_date][df_end_date]
else:
    df = extract_biomass_data(pen_id, df_start_date, df_end_date, 0.95)
    # df = extract_biomass_data(pen_id, '2020-08-24', '2020-09-03', 0.99)

    df.date = pd.to_datetime(df.date)
#     df['week'] = df.date.apply(lambda x: x.weekofyear)

    depths = []
    new_lengths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        vector = wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']
        depths.append(depth)
        new_lengths.append(np.linalg.norm(vector))
    df['depth'] = depths
    df['new_lengths'] = new_lengths
    
    queryCache[pen_id] = { df_start_date: { df_end_date: df } }


In [None]:
url = 'https://aquabyte-crops.s3.eu-west-1.amazonaws.com/environment=production/site-id=61/pen-id=116/date=2020-10-14/hour=17/at=2020-10-14T17:50:23.086425000Z/left_frame_crop_516_1433_4096_2762.jpg'


query = "SELECT * FROM prod.biomass_computations where left_crop_url='%s'" % (url,)

df = rds_access_utils.extract_from_database(query)

In [None]:
depths = []
for idx, row in df.iterrows():
    ann, cm = row.annotation, row.camera_metadata
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    depth = np.median([wkp[1] for wkp in wkps.values()])
    depths.append(depth)
df['depth'] = depths
    


In [None]:
row = df.loc[0]
ann1, cm1 = row.annotation, row.camera_metadata
wkps1 = pixel2world(ann1['leftCrop'], ann1['rightCrop'], cm1)
vector = wkps1['UPPER_LIP'] - wkps1['TAIL_NOTCH']

In [None]:
vector

np.linalg.norm(vector)

In [None]:
df = df[((df.hour >= 7) & (df.hour <= 15))]
# df = df[(df.depth < 1.8)]

# df_7000 = df[df['estimated_weight_g'] > 7000]
# df_5000 = df[df['estimated_weight_g'] < 5000]
# print(np.mean(df_7000['depth']))

In [None]:
plt.figure(figsize=(20, 10))
depths = np.arange(.5, 2.5, .1)
weights = np.arange(1000, 10000, 1000)

outputs = []
outputs2 = []

for index, depth in enumerate(depths):
    mask = (df.depth > depth) & (df.depth < (depth + .1))
#     mask = (df.estimated_weight_g > weight) & (df.estimated_weight_g < (weight + 1000))
    outputs.append(np.mean(df[mask].estimated_weight_g))
    outputs2.append(np.sum(mask))
#     plt.hist(df[mask].depth, bins = 20)
#     plt.title(weight)
#     plt.figure(figsize=(5, 2))
plt.bar(depths, outputs, width = .05)

In [None]:
np.sum((df.estimated_weight_g > 8000)) / len(df) * 1000

In [None]:
plt.figure(figsize=(5, 2))
depths = np.arange(.5, 2.5, .1)
weights = np.arange(1000, 10000, 1000)

outputs = []
outputs2 = []

for index, weight in enumerate(weights):#for index, depth in enumerate(depths):
    #mask = (df.depth > depth) & (df.depth < (depth + .1))
    mask = (df.estimated_weight_g > weight) & (df.estimated_weight_g < (weight + 1000))
#     outputs.append(np.mean(df[mask].depth))
#     outputs2.append(np.sum(mask))
    plt.hist(df[mask].depth, bins = 20)
    plt.title(weight)
    plt.figure(figsize=(5, 2))
# plt.bar(depths, outputs2, width = .05)

In [None]:
mask = (df.estimated_weight_g  < 2500)
df[mask].head()

In [None]:
# potential_duplicates = []

# def f():
#     count = 0

#     for index, row in df.iterrows():
#         for index2, row2 in df[df.index > index].iterrows():
#             diff0 = row.estimated_k_factor - row2.estimated_k_factor #(index - index2).total_seconds()
#             diff1 = row.estimated_weight_g - row2.estimated_weight_g
#             diff2 = row.estimated_length_mm - row2.estimated_length_mm
# #             print(diff0, diff1, diff2)
#             if((np.abs(diff0) < 0.05) & (np.abs(diff1) < 20) & (np.abs(diff2) < 10)):
#                 print(index, index2)
#             count = count + 1
#             if count % 10000 == 0:
#                 print(count)
            
# f()

In [None]:
plt.figure(figsize=(20, 10))
# plt.hist(df.estimated_weight_g, bins = 50)
plt.hist(df.new_lengths, bins = 50)
# plt.hist(df.depth)

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(df.new_lengths, df.estimated_weight_g)
# plt.scatter(df.estimated_length_mm, df.estimated_weight_g)

In [None]:
df1 = df[df.new_lengths > 1]

In [None]:
np.sum(df.estimated_weight_g > 8000) / len(df)
# df2 = df[df['date'] == '2020-10-27']
# df95 = df2[df2['akpd_score'] > 0.95]
# df99 = df2[df2['akpd_score'] > 0.99]
# df1_5 = df2[df2['depth'] > 1.5]

# print(len(df1_5), len(df2))
# #plt.hist(df2['hour'])

In [None]:


buckets = np.linspace(4000, 8000, 100)

results = []

def adj_weight(x):
    return x ** (2/3)

for bucket in buckets:
    min_bucket = bucket - 1000
    max_bucket = bucket + 1000
#     mask = (df.estimated_weight_g > min_bucket) & (df.estimated_weight_g < max_bucket)
    mask = (adj_weight(df.estimated_weight_g) > adj_weight(min_bucket)) & (adj_weight(df.estimated_weight_g) < adj_weight(max_bucket))
    res = stats.weibull_min.fit(df[mask].depth, floc = 0.7)
    results.append(res)
    
results = np.array(results)

In [None]:
Y0 = results[:,0]
Y2 = results[:,2]
X = buckets
X = sm.add_constant(X)
model0 = sm.OLS(Y0,X)
model2 = sm.OLS(Y2,X)
m0 = model0.fit()
m2 = model2.fit()
# OLSresults = model.fit()
# OLSresults.summary()



In [None]:
x = np.linspace(0, 3, 5000)

def get_prob(depth, weight):
    v0 = m0.predict([1, weight])
    v1 = 0.7
    v2 = m2.predict([1, weight])

    prob = stats.weibull_min.pdf(depth, v0, v1, v2) / max(stats.weibull_min.pdf(x, v0, v1, v2))
#     prob = stats.weibull_min.pdf(depth, v0, v1, v2)
    
    return prob

weights = df.estimated_weight_g
weights2 = df.estimated_weight_g[(df.depth < 1.8)]
weights_weight = []
weights_prob = []
weights_weight2 = []
weights_prob2 = []

for i, row in df.iterrows():
    prob = get_prob(row['depth'], row['estimated_weight_g'])
    
    if prob < 0.01:
        print(row['depth'], row['estimated_weight_g'])
    else:
        if row['depth'] < 1.8:
            weights_weight2.append(row['estimated_weight_g'])
            weights_prob2.append(prob[0])
        weights_weight.append(row['estimated_weight_g'])
        weights_prob.append(prob[0])
    
weights_weight = np.array(weights_weight)
weights_prob = np.array(weights_prob)
weights_weight2 = np.array(weights_weight2)
weights_prob2 = np.array(weights_prob2)

In [None]:
w1 = np.mean(weights)
w2 = np.sum(weights_weight / weights_prob) / np.sum(1 / weights_prob)
w3 = np.mean(weights2)
w4 = np.sum(weights_weight2 / weights_prob2) / np.sum(1 / weights_prob2)

mask1 = (weights > 8000)
mask2 = (weights_weight > 8000)

print(np.mean(weights[mask1]))
print(np.sum(weights_weight[mask2] / weights_prob[mask2]) / np.sum(1 / weights_prob[mask2]))

print(w1, w2, w3, w4)
print((w1 - w2) / w1)
print((w3 - w4) / w3)

In [None]:
min(weights2)

In [None]:
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,0])
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,1])
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,2])
plt.figure(figsize=(20, 5))
plt.plot(buckets, results[:,2])

In [None]:
mask1 = (df.estimated_weight_g > 4000) & (df.estimated_weight_g < 6000)
mask2 = (df.estimated_weight_g > 6000) & (df.estimated_weight_g < 8000)

d1 = df[mask1]
d2 = df[mask2]

plt.figure(figsize=(20, 10))
x = np.linspace(0, 3, 5000)
plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(d1['depth'])))
plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(d2['depth'])))
# plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(df_5000['depth'])))
# plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(df_7000['depth'])))
# plt.hist(df['depth'], bins = 30, alpha = 0.5, density = True)
# plt.hist(df_5000['depth'], bins = 30, alpha = 0.5, density = True)
# plt.hist(df_7000['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(d1['depth'], bins = 30, alpha = 0.5, color = 'green', density = True)
plt.hist(d2['depth'], bins = 30, alpha = 0.5, color = 'red', density = True)

In [None]:
print(stats.weibull_min.fit(d1['depth'], floc=0.68))
print(stats.weibull_min.fit(d1['depth']))

In [None]:
plt.hist(df[df.estimated_weight_g > 7000]['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(df[df.estimated_weight_g < 5000]['depth'], bins = 30, alpha = 0.5, density = True)

In [None]:
#res = stats.probplot(df.depth, plot=plt)
#res = stats.probplot(df.depth, dist=stats.chi2, sparams=(50, ), plot=plt)
# res = stats.probplot(df.depth, dist=stats.weibull_min, sparams=(2, 0, 1.49), plot=plt)
res = stats.probplot(df.depth, dist=stats.weibull_min, sparams=stats.weibull_min.fit(df['depth']), plot=plt)
# res = stats.probplot(df_5000.depth, dist=stats.weibull_min, sparams=(2, ), plot=plt)
# res = stats.probplot(df_7000.depth, dist=stats.weibull_min, sparams=(2, ), plot=plt)

In [None]:
plt.figure(figsize=(20, 10))
density, bins, _ = plt.hist(df2.estimated_weight_g, bins = 30, alpha = 0.5, density = True, color = 'blue')
plt.hist(df1_5.estimated_weight_g, bins = bins, alpha = 0.5, density = True, color = 'red')
#plt.hist(df99.estimated_weight_g, bins = bins, alpha = 0.5, density = True, color = 'red')

In [None]:
print(np.mean(df2.estimated_weight_g))

buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

for i in range(len(buckets) - 1):
    mask1 = (df2['estimated_weight_g'] > buckets[i]) & (df2['estimated_weight_g'] <= buckets[i + 1])
    
    print('%i: %0.2f' % (buckets[i], sum(mask1) / len(mask1)))

In [None]:
res = stats.probplot(df2.estimated_weight_g, plot=plt)

In [None]:
df1.iloc[0].annotation['leftCrop']

In [None]:


def display_crops(left_image_f, right_image_f, ann, overlay_keypoints=True, show_labels=False):
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    left_image = plt.imread(left_image_f)
    right_image = plt.imread(right_image_f)
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    left_ann, right_ann = ann['leftCrop'], ann['rightCrop']
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in left_ann}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in right_ann}
    if overlay_keypoints:
        for bp, kp in left_keypoints.items():
            axes[0].scatter([kp[0]], [kp[1]], color='red', s=10)
            if show_labels:
                axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        for bp, kp in right_keypoints.items():
            axes[1].scatter([kp[0]], [kp[1]], color='red', s=10)
            if show_labels:
                axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df1.iloc[13]
print(row.new_lengths)
print(row.estimated_weight_g)
left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann = row.annotation
display_crops(left_crop_f, right_crop_f, ann)

In [None]:
url = df1.iloc[0].left_crop_url
from IPython.display import Image
from IPython.core.display import HTML 
Image(url=url)

x = []
y = []

for point in df1.iloc[0].annotation['leftCrop']:
    x.append(point['xCrop'])
    y.append(point['yCrop'])
    
plt.scatter(x, y)

In [None]:
url = df1.iloc[0].right_crop_url

Image(url=url)