In [None]:
import json
import os
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
from filter_optimization.filter_optimization_task import extract_biomass_data
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

from scipy import stats
import statsmodels.api as sm

plt.rcParams['font.size'] = 18

In [None]:
pen5 = pd.read_csv('blom_vikane_singleweights.csv')

In [None]:
np.mean(pen5.weight * 1000 / .83), np.std(pen5.weight * 1000 / .83)

In [None]:
pen5.head()

In [None]:
def get_length_from_weight(weight):
    return y ** (1/3) / 23.6068

In [None]:
queryCache = {}

In [None]:
# pen_id = 95
# df_start_date = '2020-07-21'
# df_end_date = '2020-07-24'
pen_id = 60
df_start_date = '2020-08-24'
df_end_date = '2020-08-26'

if pen_id in queryCache and df_start_date in queryCache[pen_id] and df_end_date in queryCache[pen_id][df_start_date]:
    df = queryCache[pen_id][df_start_date][df_end_date]
else:
    df = extract_biomass_data(pen_id, df_start_date, df_end_date, 0.01)
    df.date = pd.to_datetime(df.date)

    depths = []
    lengths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        vector = wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']
        depths.append(depth)
        lengths.append(np.linalg.norm(vector))
    df['depth'] = depths
    df['length'] = lengths
    
    queryCache[pen_id] = { df_start_date: { df_end_date: df } }


In [None]:
plt.hist(df.hour, bins = 24)

In [None]:
plt.figure(figsize=(20, 10))

pen5['length'] = (pen5['weight'] * 1000 / 0.83) ** (1/3) / 23.6068

avg_weight, raw_weight = get_weight_for_fov(55, 10, pen5)

# counts, bins, _ = plt.hist(pen5.weight * 1000 / 0.83, bins = 50, density = True)
counts, bins, _ = plt.hist(np.array(raw_weight) * 1000 / 0.83, bins = 50, density = True)
plt.hist(df.estimated_weight_g[mask2], bins = bins, density = True)

In [None]:
avg_weight2, raw_weight2 = get_weight_for_fov(55, 5, pen5)

In [None]:
avg_weight3, raw_weight3 = get_weight_for_fov(55, 6, pen5)

In [None]:
avg_weight4, raw_weight4 = get_weight_for_fov(55, 7, pen5)

In [None]:
mask2 = (df.hour >= 5) & (df.hour <= 15)

counts, bins, _ = plt.hist(df.estimated_weight_g[mask2], bins = 20, density = True, alpha = 0.5, color = 'red')
plt.hist(np.array(raw_weight2) * 1000 / 0.83, bins = bins, density = True, alpha = 0.5, color = 'blue')

print(np.mean(np.array(raw_weight2) * 1000 / 0.83), np.mean(df.estimated_weight_g[mask2]))

In [None]:
from scipy.stats import norm, percentileofscore
from statsmodels.sandbox.distributions.extras import pdf_mvsk

# res = stats.probplot(pen5.weight, dist=stats.norm, plot=plt)
mask2 = (df.hour >= 5) & (df.hour <= 15)

# res = stats.probplot(df.estimated_weight_g[mask2], dist=stats.norm, plot=plt)
# res = stats.probplot(raw_weight2, dist=stats.norm, plot=plt)
# res = stats.probplot(pen5.weight, dist=stats.t, sparams=100, plot=plt)
# res = stats.probplot(sinh_archsinh_transformation(Ω, 0, -.01), dist=stats.norm, plot=plt)
# https://github.com/gregversteeg/gaussianize

x = []
y = []

vec = np.arange(start=-3,stop=3+0.001,step=0.001)
# vec2 = vec * np.std(pen5.weight) + np.mean(pen5.weight)
vec2 = np.arange(start=0, stop = 10000, step = .01)
vec3 = np.arange(start=0, stop = 10, step = .01)
# dist = sinh_archsinh_transformation(vec, 0, -.05)
my_dist = pdf_mvsk([0, 1, 0, 1])
dist = my_dist(vec)
cdf = np.cumsum(dist)
cdf = cdf / cdf[-1]
dist2 = norm.pdf(vec)

my_dist2 = pdf_mvsk([np.mean(pen5.weight) * 1000 / .83, (np.std(df.estimated_weight_g[mask2]) / 1.06) ** 2, 0, 1])
# my_dist2 = pdf_mvsk([np.mean(pen5.weight) * 1000 / .83, np.std(pen5.weight * 1000 / .83) ** 2, 0, 1])
dist3 = my_dist2(vec2)

my_dist3 = pdf_mvsk([np.mean(pen5.weight), np.std(pen5.weight) ** 2, 0, 1])
dist4 = my_dist3(vec3)

for i in np.arange(1, 99, 1):
    x.append(np.percentile(pen5.weight, i))
    y.append(np.percentile(vec, percentileofscore(cdf, i / 100)))
    
# plt.figure(figsize=(20, 20))

plt.scatter(x, y)

X = x
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
plt.plot(x, results.predict(X), color = 'red')
# plt.plot(vec, dist)

plt.figure(figsize=(10, 10))
plt.hist((pen5.weight - np.mean(pen5.weight)) / np.std(pen5.weight), density = True, bins = 50)
plt.plot(vec, dist)

plt.figure(figsize=(10, 10))
plt.hist(pen5.weight, density = True, bins = 50)
plt.plot(vec3, dist4)
# plt.plot(vec, dist2, color = 'green')

# def sinh_archsinh_transformation(x,epsilon,delta):
#     return norm.pdf(np.sinh(delta*np.arcsinh(x)-epsilon))*delta*np.cosh(delta*np.arcsinh(x)-epsilon)/np.sqrt(1+np.power(x,2))

In [None]:
plt.figure(figsize=(10, 10))
plt.hist(pen5.weight * 1000 / .83, density = True, alpha = 0.5, bins = 50)
# plt.hist(df.estimated_weight_g[mask2], density = True, alpha = 0.5, bins = 50)
plt.plot(vec2, stats.norm.pdf(vec2, *stats.norm.fit(pen5.weight * 1000 / .83)), color = 'red')
plt.plot(vec2, dist3, lw = 4, color = 'green')

print(np.std(df.estimated_weight_g[mask2]), np.std(pen5.weight * 1000 / .83), np.std(df.estimated_weight_g[mask2]) / np.std(pen5.weight * 1000 / .83), np.mean(df.estimated_weight_g[mask2]) / np.mean(pen5.weight * 1000 / .83))

In [None]:
dist3

In [None]:
mask2 = (df.hour >= 5) & (df.hour <= 15)

counts, bins, _ = plt.hist(df.estimated_weight_g[mask2], bins = 20, density = True, alpha = 0.5, color = 'red')
plt.hist(np.array(raw_weight4) * 1000 / 0.83, bins = bins, density = True, alpha = 0.5, color = 'blue')

print(np.mean(np.array(raw_weight4) * 1000 / 0.83), np.mean(df.estimated_weight_g[mask2]))

In [None]:
mask2 = (df.hour >= 5) & (df.hour <= 15)

counts, bins, _ = plt.hist(df.estimated_weight_g[mask2], bins = 20, density = True, alpha = 0.5, color = 'red')
plt.hist(np.array(raw_weight3) * 1000 / 0.83, bins = bins, density = True, alpha = 0.5, color = 'blue')

print(np.mean(np.array(raw_weight3) * 1000 / 0.83), np.mean(df.estimated_weight_g[mask2]))

In [None]:
buckets = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]
x_buckets = np.array(buckets[:-1])
smart_pcts = [0.0 ,
0.010945273631840797 ,
0.06998341625207297 ,
0.35190713101160864 ,
0.417910447761194 ,
0.12802653399668326 ,
0.020895522388059702 ,
0.0003316749585406302 ,
0.0 ,
0.0]

mask2 = (df.hour >= 5) & (df.hour <= 15)
d4 = df.estimated_weight_g[mask2]
d2 = np.array(raw_weight2) * 1000 / 0.83
d3 = pen5.weight * 1000 / 0.83

pcts1 = []
pcts2 = []
pcts3 = []
pcts4 = []

errors1 = []
errors2 = []
errors3 = []
errors4 = []

for i in range(len(buckets) - 1):
    mask1 = (d4 > buckets[i]) & (d4 <= buckets[i + 1])
    mask2 = (d2 > buckets[i]) & (d2 <= buckets[i + 1])
    mask3 = (d3 > buckets[i]) & (d3 <= buckets[i + 1])
    
    pct1 = np.sum(mask1) / len(mask1)
    pcts1.append(pct1)
    pct2 = np.sum(mask2) / len(mask2)
    pcts2.append(pct2)
    pct3 = np.sum(mask3) / len(mask3)
    pcts3.append(pct3)
    
    print(pct3)
    
    pct4 = np.sum(dist3[(vec2 > buckets[i]) & (vec2 <= buckets[i + 1])]) / np.sum(dist3)
    pcts4.append(pct4)
    
    errors1.append(np.abs(100 * (pct1 - pct2)))
    errors2.append(np.abs(100 * (pct1 - pct3)))
    errors3.append(np.abs(100 * (smart_pcts[i] - pct2)))
    errors4.append(np.abs(100 * (pct4 - pct3)))
    
#     print('%i: %0.2f%%' % (buckets[i], 100 * (pct4 - pct3)))

print(np.max(errors1), np.max(errors2), np.max(errors3), np.max(errors4))
print(np.mean(errors1), np.mean(errors2), np.mean(errors3), np.mean(errors4))
print((np.mean(d3) - np.mean(d4)), np.mean(d2) - np.mean(d4))
    
plt.figure(figsize=(20, 10))
plt.bar(x_buckets - 150, pcts1, color = 'red', width = 150, label = 'Original')
plt.bar(x_buckets, pcts2, color = 'blue', width = 150, label = 'Dedup')
plt.bar(x_buckets + 150, pcts3, color = 'green', width = 150, label = 'Original')
plt.bar(x_buckets + 300, pcts4, color = 'purple', width = 150, label = 'Original')
# plt.bar(x_buckets + 300, smart_pcts, color = 'purple', width = 150, label = 'Original')

In [None]:
x_pct = []
y_pct = []

for i in np.arange(1, 99, 1):
    x_pct.append(np.percentile(df.estimated_weight_g[mask2], i))
    y_pct.append(np.percentile(np.array(raw_weight2) * 1000 / 0.83, i))
    
plt.scatter(x_pct, y_pct)
plt.plot(x_pct, x_pct)

In [None]:
mask2 = (df.hour >= 5) & (df.hour <= 15)
counts, bins, _ = plt.hist(pen5.weight * 1000 / 0.83, bins = 30, density = True, alpha = 0.5, color = 'green')
plt.hist(df.estimated_weight_g[mask2] + 170, bins = bins, density = True, alpha = 0.5, color = 'red')
np.mean(pen5.weight * 1000 / 0.83), np.mean(df.estimated_weight_g[mask2])

In [None]:
x_pct = []
y_pct = []

for i in np.arange(1, 99, 1):
    x_pct.append(np.percentile(df.estimated_weight_g[mask2], i))
    y_pct.append(np.percentile(pen5.weight * 1000 / 0.83, i))
    
plt.scatter(x_pct, y_pct)
plt.plot(x_pct, x_pct)

In [None]:
counts, bins, _ = plt.hist(pen5.weight * 1000 / 0.83, bins = 50, density = True, alpha = 0.5, color = 'green')
plt.hist(np.array(raw_weight2) * 1000 / 0.83, bins = bins, density = True, alpha = 0.5, color = 'blue')


In [None]:
counts, bins, _ = plt.hist(pen5.weight * 1000 / 0.83, bins = 50, density = True, alpha = 0.5, color = 'green')
plt.hist(np.array(raw_weight) * 1000 / 0.83, bins = bins, density = True, alpha = 0.5)


In [None]:
counts, bins, _ = plt.hist(pen5.weight * 1000 / 0.83, bins = 50, density = True, alpha = 0.5, color = 'green')
plt.hist(df.estimated_weight_g[mask2], bins = bins, density = True, alpha = 0.5, color = 'red')
np.mean(pen5.weight * 1000 / 0.83), np.mean(df.estimated_weight_g[mask2])

In [None]:
def get_weight_for_fov(degrees, density, df):
    fov = degrees * np.pi / 180
    params_depth = 2
    camera_location = 5
    total_length = 10

    all_weights = []

    num_samples = int(total_length * density)
    
    count = 0
    
    while(len(all_weights) < 3000 and count < 20000):
        count = count + 1
        if count % 1000 == 0:
            print(count)

        results = df.sample(n = num_samples, replace = True)

        x = []

        for index, row in results.iterrows():
            location = np.random.uniform(0, total_length)
            depth = np.random.uniform(0, params_depth)

            x.append([location, row.length, depth, row.weight])
            #x.append([location, row.length, depth, row.estimated_weight_g])

        a = np.array(x)
        b = a[np.argsort(a[:, 2])]

        all_segments = []
        curr_segments = []
        curr_depth = 0

        for row in b:
            curr_depth = row[2]

            band = np.tan(fov / 2) * curr_depth

            lower_bound = camera_location - band
            upper_bound = camera_location + band
            
            if not ((row[0] > lower_bound) and (row[0] + row[1] < upper_bound)):
                if (row[0] > lower_bound) and (row[0] < upper_bound):
                    all_segments.append(row)
                elif ((row[0] + row[1]) > lower_bound) and ((row[0] + row[1]) < upper_bound):
                    all_segments.append(row)
                continue

            is_occluded = False

            for seg in all_segments:
                lower_adj_segment = camera_location + (row[0] - camera_location) * curr_depth / seg[2]
                upper_adj_segment = camera_location + ((row[0] + row[1]) - camera_location) * curr_depth / seg[2]

                if not ((row[0] + row[1]) < lower_adj_segment or row[0] > upper_adj_segment):
                    is_occluded = True

            if not is_occluded:
                all_weights.append(row[3])
                
            all_segments.append(row)

    return np.mean(all_weights), all_weights


In [None]:
fovs = []
weights = []
raw_weights = []

for degree in np.arange(10, 180, 10):
    avg_weight, raw_weight = get_weight_for_fov(degree)
    fovs.append(degree)
    weights.append(avg_weight)
    raw_weights.append(raw_weight)
    print(degree, avg_weight, len(raw_weight))

In [None]:
fovs = []
weights = []
raw_weights = []

for density in np.arange(0.25, 5, 0.25):
    avg_weight, raw_weight = get_weight_for_fov(55, density)
    fovs.append(density)
    weights.append(avg_weight)
    raw_weights.append(raw_weight)
    print(density, avg_weight, len(raw_weight))

In [None]:
for index, density in enumerate(np.arange(8, 20, 2)):
    print(density, weights[index], len(raw_weights[index]))

In [None]:
plt.figure(figsize=(20, 10))
counts, bins, _ = plt.hist(df.estimated_weight_g, density = True, alpha = 0.5, color = 'red', bins = 20)
counts2, bins, _ = plt.hist(raw_weights[3], density = True, alpha = 0.5, color = 'blue', bins = bins)

(np.array(counts2) / np.sum(counts)) / (np.array(counts) / np.sum(counts2))

In [None]:
import scipy.stats as stats

mean, std = stats.norm.fit(df.estimated_weight_g)
mean2, std2 = stats.norm.fit(raw_weights[3])

print(mean, std)

# stats.probplot(raw_weights[3], plot = plt)

In [None]:
stats.probplot(df.estimated_weight_g, plot = plt)

In [None]:
for i in range(4):
    mean, std = stats.norm.fit(raw_weights[i])
    print(mean, std)

In [None]:
5632.671352371154 / 5483.3882774105505

In [None]:
1098.5482565591922 / 1025.0008784912347

In [None]:
plt.hist(raw_weights[1])

In [None]:
plt.hist(raw_weights[2])

In [None]:
plt.hist(raw_weights[3])

In [None]:
plt.hist(raw_weights[4])

In [None]:
plt.plot(np.arange(1, 5, 1), weights / np.mean(df.estimated_weight_g))

In [None]:
5500 / 5700

In [None]:
np.mean(all_weights), np.mean(df.estimated_weight_g)

In [None]:
np.mean(all_weights) / np.mean(df.estimated_weight_g)

In [None]:
plt.plot(fovs, weights)

In [None]:
plt.plot(fovs, np.array(weights) / np.mean(df.estimated_weight_g))

In [None]:
weights2 = weights