In [None]:
import math
import json
import os
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta, time
from research.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateFormatter, AutoDateLocator

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
duplicates = pd.read_csv('vikane_pen_5_data_duplicate.csv')

duplicates = duplicates.sort_values('captured_at').copy(deep=True)
duplicates.index = pd.to_datetime(duplicates.captured_at)
dates = duplicates.index.date.astype(str)
duplicates['date'] = dates
duplicates['hour'] = duplicates.index.hour

In [None]:
ground_truth = pd.read_csv('blom_vikane_singleweights.csv')

In [None]:
ground_truth.weight.mean()

In [None]:
ground_truth[(ground_truth.weight >= 3) & (ground_truth.weight <= 4)].shape[0] / ground_truth.shape[0]

In [None]:
np.min(duplicates['estimated_length_mm'])

In [None]:
duplicates.tail()

In [None]:
plt.hist(duplicates['hour'])

In [None]:
import ast

thetas = []
phis = []

for index, row in duplicates.iterrows():
    ann1, cm1 = ast.literal_eval(row.annotation), ast.literal_eval(row.camera_metadata)
    
    wkps1 = pixel2world(ann1['leftCrop'], ann1['rightCrop'], cm1)

    vector = wkps1['PECTORAL_FIN'] - wkps1['ANAL_FIN']
    x, y, z = vector / np.linalg.norm(vector)
    
    theta = math.atan(y / x) * np.sign(y)
    phi = math.acos(z)
    dtheta = math.degrees(theta)
    dphi = 90 - math.degrees(phi)
    thetas.append(dtheta)
    phis.append(dphi)
    
plt.figure(figsize=(20, 10))
plt.scatter(thetas, phis, color = 'orange', label = 'Normal')
# plt.scatter(thetas2, phis2, color = 'blue', label = 'Negative')
plt.xlabel('Theta degree')
plt.ylabel('Phi degree')
plt.legend()

duplicates['theta'] = thetas
duplicates['phi'] = phis


In [None]:
ground_truth.head()

In [None]:
gt_weights = ground_truth['weight'] * 1000 / 0.83

In [None]:
from scipy.stats import t, norm

mask0 = (duplicates['captured_at'] > '2020-08-24') & (duplicates['captured_at'] < '2020-08-26')

#mask1 = mask0 & (np.abs(duplicates['theta']) < 10) & (np.abs(duplicates['phi']) < 10)
mask1 = mask0 & (np.abs(duplicates['hour']) > 5) & (np.abs(duplicates['hour']) < 17)
mask2 = mask1 & (duplicates['is_duplicate'] == 0)

print(sum(mask0), sum(mask1), sum(mask2))

dist1 = duplicates[mask1]
dist2 = duplicates[mask2]

# df, mean, std = t.fit(dist1['estimated_weight_g'])
# df2, mean2, std2 = t.fit(dist2['estimated_weight_g'])
# gt_df, gt_mean, gt_std = t.fit(gt_weights)
mean, std = norm.fit(dist1['estimated_weight_g'])
mean2, std2 = norm.fit(dist2['estimated_weight_g'])
gt_mean, gt_std = norm.fit(gt_weights)

print(len(dist1), len(dist2))

plt.figure(figsize=(20, 10))

plt.hist(dist1['estimated_weight_g'], color = 'blue', alpha = 0.5, density = True, bins = 30)
plt.hist(gt_weights, color = 'red', alpha = 0.5, density = True, bins = 30)

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 1000)

#y = t.pdf(x, df, mean, std)
y = norm.pdf(x, mean, std)
plt.plot(x, y)
y2 = norm.pdf(x, mean2, std2)
# plt.plot(x, y2)
#plt.plot(x, y + 10 * (y - y2), color = 'green', linestyle = '-')

new_x = x - 7.5 * (np.mean(dist1['estimated_weight_g']) - np.mean(dist2['estimated_weight_g']))
plt.plot(new_x, y + 15 * (y - y2), color = 'red', linestyle = '-')

gt_x = np.linspace(xmin, xmax, 1000)
#gt_y = t.pdf(gt_x, gt_df, gt_mean, gt_std)
gt_y = norm.pdf(gt_x, gt_mean, gt_std)
plt.plot(gt_x, gt_y, color = 'black', linewidth = 4)

plt.figure(figsize=(20, 10))
plt.hist(dist1['estimated_weight_g'], color = 'blue', alpha = 0.5, density = True, bins = 30)
plt.hist(gt_weights, color = 'red', alpha = 0.5, density = True, bins = 30)

In [None]:
plt.figure(figsize=(20, 10))
density, bins, _ = plt.hist(dist1['estimated_weight_g'], alpha = 0, density = True, bins = 30)
density2, bins, _ = plt.hist(dist2['estimated_weight_g'], bins = bins, alpha = 0, density = True)

bin_width = bins[1] - bins[0]

new_density = density + 10 * (density - density2)
new_density[new_density < 0] = 0

_bins_adj = []
for i, end_bin in enumerate(bins[1:]):
    start_bin = bins[i]
    _mask = (dist1['estimated_weight_g'] > start_bin) & (dist1['estimated_weight_g'] <= end_bin)
    _bins_adj.append(np.mean(dist1['estimated_weight_g'][_mask]))
    
#bins_adj = bins[1:] - bin_width / 2
bins_adj = np.array(_bins_adj)
new_bins_adj = bins_adj - 5 * (np.mean(dist1['estimated_weight_g']) - np.mean(dist2['estimated_weight_g']))

plt.bar(new_bins_adj, new_density, color = 'blue', alpha = 0.5, width = bin_width)
gt_density, gt_bins, _ = plt.hist(gt_weights, bins = bins, color = 'red', alpha = 0.5, density = True)

In [None]:
buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]

new_density_adj = new_density / np.sum(new_density)

for i in range(len(buckets) - 1):
    mask1 = (new_bins_adj > buckets[i]) & (new_bins_adj <= buckets[i + 1])
    mask2 = (gt_weights > buckets[i]) & (gt_weights <= buckets[i + 1])
#     dist = dist1['estimated_weight_g'][mask1]
#     gt = gt_weights[mask2]
    
    print('%i: %0.3f, %0.3f vs %0.3f' % (buckets[i], np.sum(new_density_adj[mask1]) - sum(mask2) / len(mask2), np.sum(new_density_adj[mask1]), sum(mask2) / len(mask2)))

result = np.sum(new_bins_adj * new_density_adj) 
(result - np.mean(gt_weights)) / np.mean(gt_weights)
print(result, np.mean(gt_weights))
print((result - np.mean(gt_weights)) / np.mean(gt_weights))

In [None]:
_new_pdf = y + 15 * (y - y2)
_new_pdf[_new_pdf < 0] = 0
new_pdf = _new_pdf / np.sum(_new_pdf)

new_x = x - 7.5 * (np.mean(dist1['estimated_weight_g']) - np.mean(dist2['estimated_weight_g']))

result = np.sum(new_x * new_pdf) 
(result - np.mean(gt_weights)) / np.mean(gt_weights)
print(result, np.mean(dist1['estimated_weight_g']), np.mean(gt_weights))
print((result - np.mean(gt_weights)) / np.mean(gt_weights))

In [None]:
buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]

for i in range(len(buckets) - 1):
    mask1 = (new_x > buckets[i]) & (new_x <= buckets[i + 1])
    mask2 = (gt_weights > buckets[i]) & (gt_weights <= buckets[i + 1])
#     dist = dist1['estimated_weight_g'][mask1]
#     gt = gt_weights[mask2]
    
    print('%i: %0.3f, %0.3f vs %0.3f' % (buckets[i], np.sum(new_pdf[mask1]) - sum(mask2) / len(mask2), np.sum(new_pdf[mask1]), sum(mask2) / len(mask2)))
    

In [None]:
buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]

for i in range(len(buckets) - 1):
    mask1 = (dist1['estimated_weight_g'] > buckets[i]) & (dist1['estimated_weight_g'] <= buckets[i + 1])
    mask2 = (gt_weights > buckets[i]) & (gt_weights <= buckets[i + 1])
#     dist = dist1['estimated_weight_g'][mask1]
#     gt = gt_weights[mask2]
    
    print('%i: %0.2f, %0.2f vs %0.2f' % (buckets[i], sum(mask1) / len(mask1) - sum(mask2) / len(mask2), sum(mask1) / len(mask1), sum(mask2) / len(mask2)))

In [None]:
buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]

for i in range(len(buckets) - 1):
    mask1 = (dist2['estimated_weight_g'] > buckets[i]) & (dist2['estimated_weight_g'] <= buckets[i + 1])
    mask2 = (gt_weights > buckets[i]) & (gt_weights <= buckets[i + 1])
    
    print('%i: %0.2f, %0.2f vs %0.2f' % (buckets[i], sum(mask1) / len(mask1) - sum(mask2) / len(mask2), sum(mask1) / len(mask1), sum(mask2) / len(mask2)))

In [None]:
buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]

for i in range(len(buckets) - 1):
    mask1 = (dist1['estimated_weight_g'] > buckets[i]) & (dist1['estimated_weight_g'] <= buckets[i + 1])
    mask2 = (dist2['estimated_weight_g'] > buckets[i]) & (dist2['estimated_weight_g'] <= buckets[i + 1])
    mask_gt = (gt_weights > buckets[i]) & (gt_weights <= buckets[i + 1])
    
    print('%i: %0.3f, %0.3f vs %0.3f' % (buckets[i], 20 * (sum(mask1) / len(mask1) - sum(mask2) / len(mask2)), sum(mask1) / len(mask1), sum(mask2) / len(mask2)))
   # print('%i: %0.2f, %0.2f vs %0.2f' % (buckets[i], sum(mask1) / len(mask1) - sum(mask_gt) / len(mask_gt), sum(mask1) / len(mask1), sum(mask_gt) / len(mask_gt)))
    
    

In [None]:
buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]

for i in range(len(buckets) - 1):
    mask1 = (dist1['estimated_weight_g'] > buckets[i]) & (dist1['estimated_weight_g'] <= buckets[i + 1])
    mask2 = (dist2['estimated_weight_g'] > buckets[i]) & (dist2['estimated_weight_g'] <= buckets[i + 1])
    mask_gt = (gt_weights > buckets[i]) & (gt_weights <= buckets[i + 1])
    
    print('%i: %0.2f, %0.2f vs %0.2f' % (buckets[i], sum(mask1) / len(mask1) - sum(mask_gt) / len(mask_gt) + 10 * (sum(mask1) / len(mask1) - sum(mask2) / len(mask2)), sum(mask1) / len(mask1) + 10 * (sum(mask1) / len(mask1) - sum(mask2) / len(mask2)), sum(mask_gt) / len(mask_gt)))

In [None]:
print(np.mean(dist1['estimated_weight_g']), mean, np.mean(dist2['estimated_weight_g']), np.mean(gt_weights))

In [None]:
print((np.mean(dist1['estimated_weight_g']) - np.mean(gt_weights)) / np.mean(gt_weights))
print((np.mean(dist2['estimated_weight_g']) - np.mean(gt_weights)) / np.mean(gt_weights))