In [None]:
import json
import os
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
from filter_optimization.filter_optimization_task import extract_biomass_data
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

from scipy import stats
import statsmodels.api as sm

plt.rcParams['font.size'] = 18

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
queryCache = {}

In [None]:
pen_id = 153
df_start_date = '2020-10-06'
df_end_date = '2020-10-28'

if pen_id in queryCache and df_start_date in queryCache[pen_id] and df_end_date in queryCache[pen_id][df_start_date]:
    df = queryCache[pen_id][df_start_date][df_end_date]
else:
    df = extract_biomass_data(pen_id, df_start_date, df_end_date, 0.9)
    # df = extract_biomass_data(pen_id, '2020-08-24', '2020-09-03', 0.99)

    df.date = pd.to_datetime(df.date)
#     df['week'] = df.date.apply(lambda x: x.weekofyear)

    depths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        depths.append(depth)
    df['depth'] = depths
    
    queryCache[pen_id] = { df_start_date: { df_end_date: df } }


In [None]:
df.head()

df_7000 = df[df['estimated_weight_g'] > 7000]
df_5000 = df[df['estimated_weight_g'] < 5000]
print(np.mean(df_7000['depth']))

In [None]:
df2 = df[df['date'] == '2020-10-27']
df95 = df2[df2['akpd_score'] > 0.95]
df99 = df2[df2['akpd_score'] > 0.99]
df1_5 = df2[df2['depth'] > 1.5]

print(len(df1_5), len(df2))
#plt.hist(df2['hour'])

In [None]:


buckets = np.linspace(4000, 7000, 100)

results = []

def adj_weight(x):
    return x ** (2/3)

for bucket in buckets:
    min_bucket = bucket - 1000
    max_bucket = bucket + 1000
    mask = (df.estimated_weight_g > min_bucket) & (df.estimated_weight_g < max_bucket)
#     mask = (adj_weight(df.estimated_weight_g) > adj_weight(min_bucket)) & (adj_weight(df.estimated_weight_g) < adj_weight(max_bucket))
    res = stats.weibull_min.fit(df[mask].depth, floc = 0.7)
    results.append(res)
    
results = np.array(results)

In [None]:
Y0 = results[:,0]
Y2 = results[:,2]
X = buckets
X = sm.add_constant(X)
model0 = sm.OLS(Y0,X)
model2 = sm.OLS(Y2,X)
m0 = model0.fit()
m2 = model2.fit()
# OLSresults = model.fit()
# OLSresults.summary()



In [None]:
def get_prob(depth, weight):
    v0 = m0.predict([1, weight])
    v1 = 0.7
    v2 = m2.predict([1, weight])

    prob = stats.weibull_min.pdf(depth, v0, v1, v2)
    
    return prob

weights = df.estimated_weight_g

weights_weight = []
weights_prob = []

for i, row in df.iterrows():
    prob = get_prob(row['depth'], row['estimated_weight_g'])
    
    if prob < 0.01:
        print(row['depth'], row['estimated_weight_g'])
    else:
        weights_weight.append(row['estimated_weight_g'])
        weights_prob.append(prob[0])
    
weights_weight = np.array(weights_weight)
weights_prob = np.array(weights_prob)

In [None]:
w1 = np.mean(weights)
w2 = np.sum(weights_weight / weights_prob) / np.sum(1 / weights_prob)

print(w1, w2)
print((w1 - w2) / w1)

In [None]:
min(weights2)

In [None]:
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,0])
plt.figure(figsize=(20, 5))
plt.plot(buckets, results[:,0])
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,2])
plt.figure(figsize=(20, 5))
plt.plot(buckets, results[:,2])

In [None]:
mask1 = (df.estimated_weight_g > 4000) & (df.estimated_weight_g < 6000)
mask2 = (df.estimated_weight_g > 6000) & (df.estimated_weight_g < 8000)

d1 = df[mask1]
d2 = df[mask2]

plt.figure(figsize=(20, 10))
x = np.linspace(0, 3, 5000)
plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(d1['depth'])))
plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(d2['depth'])))
# plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(df_5000['depth'])))
# plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(df_7000['depth'])))
# plt.hist(df['depth'], bins = 30, alpha = 0.5, density = True)
# plt.hist(df_5000['depth'], bins = 30, alpha = 0.5, density = True)
# plt.hist(df_7000['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(d1['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(d2['depth'], bins = 30, alpha = 0.5, density = True)

In [None]:
print(stats.weibull_min.fit(d1['depth'], floc=0.68))
print(stats.weibull_min.fit(d1['depth']))

In [None]:
plt.hist(df[df.estimated_weight_g > 7000]['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(df[df.estimated_weight_g < 5000]['depth'], bins = 30, alpha = 0.5, density = True)

In [None]:
#res = stats.probplot(df.depth, plot=plt)
#res = stats.probplot(df.depth, dist=stats.chi2, sparams=(50, ), plot=plt)
# res = stats.probplot(df.depth, dist=stats.weibull_min, sparams=(2, 0, 1.49), plot=plt)
res = stats.probplot(df.depth, dist=stats.weibull_min, sparams=stats.weibull_min.fit(df['depth']), plot=plt)
# res = stats.probplot(df_5000.depth, dist=stats.weibull_min, sparams=(2, ), plot=plt)
# res = stats.probplot(df_7000.depth, dist=stats.weibull_min, sparams=(2, ), plot=plt)

In [None]:
plt.figure(figsize=(20, 10))
density, bins, _ = plt.hist(df2.estimated_weight_g, bins = 30, alpha = 0.5, density = True, color = 'blue')
plt.hist(df1_5.estimated_weight_g, bins = bins, alpha = 0.5, density = True, color = 'red')
#plt.hist(df99.estimated_weight_g, bins = bins, alpha = 0.5, density = True, color = 'red')

In [None]:
print(np.mean(df2.estimated_weight_g))

buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

for i in range(len(buckets) - 1):
    mask1 = (df2['estimated_weight_g'] > buckets[i]) & (df2['estimated_weight_g'] <= buckets[i + 1])
    
    print('%i: %0.2f' % (buckets[i], sum(mask1) / len(mask1)))

In [None]:
res = stats.probplot(df2.estimated_weight_g, plot=plt)