In [None]:
import math
import json
import os
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta, time
from research.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateFormatter, AutoDateLocator

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
duplicates = pd.read_csv('slapoya_p1_duplicate.csv')

duplicates = duplicates.sort_values('captured_at').copy(deep=True)
duplicates.index = pd.to_datetime(duplicates.captured_at)
dates = duplicates.index.date.astype(str)
duplicates['date'] = dates
duplicates['hour'] = duplicates.index.hour

In [None]:
mask = (duplicates['is_duplicate'] == 1)

plt.figure(figsize=(20, 10))


plt.hist(duplicates[mask]['hour'], alpha = 0.5, color = 'blue', density = True, bins = 24)
plt.hist(duplicates[~mask]['hour'], alpha = 0.5, color = 'red', density = True, bins = 24)

In [None]:
from scipy.stats import t, norm

mask0 = (duplicates['captured_at'] > '2020-10-26') & (duplicates['captured_at'] < '2020-10-30')

#mask1 = mask0 & (np.abs(duplicates['theta']) < 10) & (np.abs(duplicates['phi']) < 10)
mask1 = mask0 & (np.abs(duplicates['hour']) > 5) & (np.abs(duplicates['hour']) < 17)
mask2 = mask1 & (duplicates['is_duplicate'] == 0)

print(sum(mask0), sum(mask1), sum(mask2))

dist1 = duplicates[mask1]
dist2 = duplicates[mask2]

# df, mean, std = t.fit(dist1['estimated_weight_g'])
# df2, mean2, std2 = t.fit(dist2['estimated_weight_g'])
# gt_df, gt_mean, gt_std = t.fit(gt_weights)
mean, std = norm.fit(dist1['estimated_weight_g'])
mean2, std2 = norm.fit(dist2['estimated_weight_g'])
# gt_mean, gt_std = norm.fit(gt_weights)

print(len(dist1), len(dist2))

plt.figure(figsize=(20, 10))

plt.hist(dist1['estimated_weight_g'], color = 'blue', alpha = 0.5, density = True, bins = 30)
plt.hist(dist2['estimated_weight_g'], color = 'red', alpha = 0.5, density = True, bins = 30)
# plt.hist(gt_weights, color = 'red', alpha = 0.5, density = True, bins = 30)

xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 1000)

#y = t.pdf(x, df, mean, std)
y = norm.pdf(x, mean, std)
plt.plot(x, y)
y2 = norm.pdf(x, mean2, std2)
# plt.plot(x, y2)
#plt.plot(x, y + 10 * (y - y2), color = 'green', linestyle = '-')

new_x = x - 5 * (np.mean(dist1['estimated_weight_g']) - np.mean(dist2['estimated_weight_g']))
plt.plot(new_x, y + 10 * (y - y2), color = 'red', linestyle = '-')

# gt_x = np.linspace(xmin, xmax, 1000)
# #gt_y = t.pdf(gt_x, gt_df, gt_mean, gt_std)
# gt_y = norm.pdf(gt_x, gt_mean, gt_std)
# plt.plot(gt_x, gt_y, color = 'black', linewidth = 4)

plt.figure(figsize=(20, 10))
plt.hist(dist2['estimated_weight_g'], color = 'blue', alpha = 0.5, density = True, bins = 30)
# plt.hist(gt_weights, color = 'red', alpha = 0.5, density = True, bins = 30)

In [None]:
print(mean, mean2)