In [None]:
import json
import os
import math
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
from filter_optimization.filter_optimization_task import extract_biomass_data
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils

from scipy import stats
import statsmodels.api as sm

plt.rcParams['font.size'] = 18

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
queryCache = {}

In [None]:
pen_id = 153
# df_start_date = '2020-10-06'
# df_end_date = '2020-10-28'
# df_start_date = '2020-11-16'
# df_end_date = '2020-11-19'
df_start_date = '2020-11-18'
df_end_date = '2020-11-21'

pen_id = 

if pen_id in queryCache and df_start_date in queryCache[pen_id] and df_end_date in queryCache[pen_id][df_start_date]:
    df = queryCache[pen_id][df_start_date][df_end_date]
else:
    df = extract_biomass_data(pen_id, df_start_date, df_end_date, 0.01)
    # df = extract_biomass_data(pen_id, '2020-08-24', '2020-09-03', 0.99)

    df.date = pd.to_datetime(df.date)
#     df['week'] = df.date.apply(lambda x: x.weekofyear)

    depths = []
    lengths = []
    lengths_adj = []
    lengths_adj2 = []
    coplanarity = []
    coangle = []
    
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        vector = wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']
        
        centroid = .5 * (wkps['DORSAL_FIN'] + wkps['PELVIC_FIN'])
        angle = np.linalg.norm(np.array(get_angles(wkps['UPPER_LIP'], centroid)) - np.array(get_angles(centroid, wkps['TAIL_NOTCH'])))
        a = (wkps['UPPER_LIP'] - centroid) / np.linalg.norm(wkps['UPPER_LIP'] - centroid)
        b = (wkps['TAIL_NOTCH'] - centroid) / np.linalg.norm(wkps['TAIL_NOTCH'] - centroid)

        lengths_adj.append(np.linalg.norm((wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']) * a))
        lengths_adj2.append(np.linalg.norm((wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']) * b))

        depths.append(depth)
        lengths.append(np.linalg.norm(vector))
        coplanarity.append(equation_plane(wkps['TAIL_NOTCH'], wkps['DORSAL_FIN'], wkps['PELVIC_FIN'], wkps['UPPER_LIP']))
        coangle.append(angle)
    df['depth'] = depths
    df['length'] = lengths
    df['length_adj'] = lengths_adj
    df['length_adj2'] = lengths_adj2
    df['coplanarity'] = coplanarity
    df['coangle'] = coangle
    
    df['estimated_weight'] = get_weight(df.length)
    df['estimated_weight_adj'] = get_weight(df.length_adj)
    df['estimated_weight_adj2'] = get_weight(df.length_adj2)
    df['weight_diff'] = df['estimated_weight'] - df['estimated_weight_g']
    df['length_diff'] = df['length'] - df['length_adj']
    df['length_diff2'] = df['length_adj'] - df['length_adj2']
    
    queryCache[pen_id] = { df_start_date: { df_end_date: df } }

add_angles(df)

In [None]:
df1 = df[(df.akpd_score > 0.95) & (df.hour >= 7) & (df.hour <= 15)]

plt.scatter(df1.estimated_weight_g, df1.estimated_weight)

In [None]:
plt.hist(df1.estimated_weight_g)

In [None]:
np.mean(df1.estimated_weight_g)

In [None]:
df.iloc[0]

In [None]:
def add_angles(df1):
    thetas = []
    phis = []

    for index, row in df1.iterrows():
        ann1, cm1 = row.annotation, row.camera_metadata
        wkps1 = pixel2world(ann1['leftCrop'], ann1['rightCrop'], cm1)

        vector = wkps1['PECTORAL_FIN'] - wkps1['ANAL_FIN']
        x, y, z = vector / np.linalg.norm(vector)

        theta = math.atan(y / x) * np.sign(y)
        phi = math.acos(z)
        dtheta = math.degrees(theta)
        dphi = 90 - math.degrees(phi)
        thetas.append(dtheta)
        phis.append(dphi)

#     plt.figure(figsize=(20, 10))
#     plt.scatter(thetas, phis, color = 'orange', label = 'Normal')
#     # plt.scatter(thetas2, phis2, color = 'blue', label = 'Negative')
#     plt.xlabel('Theta degree')
#     plt.ylabel('Phi degree')
#     plt.legend()

    df1['theta'] = thetas
    df1['phi'] = phis

In [None]:
def get_weight(length):
    return (length * 23.6068) ** 3

In [None]:
get_weight(0.65), get_weight(0.7)

In [None]:
# df2 = df[(df.hour >= 7)]
# df2 = df[(df.hour >= 7) & (df.length_diff < .1) & (df.length_diff2 < .05) & ((df.length - df.length_adj) / df.length < .2)]
# df2 = df[(df.hour >= 7) & (np.abs(df.length_diff2) / df.length < .05) & ((df.length - df.length_adj) / df.length < .1)]
# df2 = df[(df.hour >= 7) & (np.abs(df.length_diff2) / df.length < .05) & (df.length - df.length_adj < .1)]
df2 = df[(df.hour >= 7) & (np.abs(df.length_diff2) < .01) & (df.length - .5 * (df.length_adj + df.length_adj2) < .01)]

print(len(df2) / len(df))

np.mean(df2.estimated_weight_g), np.mean(df2.estimated_weight_adj), np.mean(df2.estimated_weight_adj2), np.mean(np.maximum(df2.estimated_weight_adj, df2.estimated_weight_adj2)), np.mean(df2.estimated_weight)

In [None]:
limit = 0.05
df2 = df[(df.hour >= 7) & (np.abs(df.length_diff2) / df.length < limit) & (df.length < 1) ]
# plt.scatter(df2.estimated_weight_g, np.maximum(df2.estimated_weight_adj, df2.estimated_weight_adj2))
plt.scatter(df2.estimated_weight_g, df2.estimated_weight)

In [None]:
limits = []
pcts = []
avg_weights = []

# for limit in np.arange(0.01, 0.2, 0.01):
for limit in np.arange(1, 1.5, .05):
#     df2 = df[(df.hour >= 7) & (np.abs(df.length_diff2)  / df.length < limit) & ((df.length - .5 * (df.length_adj + df.length_adj2)) / df.length < limit)]
#     df2 = df[(df.hour >= 7) & ((df.length - .5 * (df.length_adj + df.length_adj2)) / df.length_adj < limit)]
#     df2 = df[(df.hour >= 7) & (np.abs(df.length_diff2) / df.length < .1) & (df.length < limit)]
    df2 = df[(df.hour >= 7)]
    limits.append(limit)
    pcts.append(len(df2) / len(df))
#     avg_weights.append(np.mean(df2.estimated_weight))
#     avg_weights.append(np.mean(df2.estimated_weight_g))
    avg_weights.append(np.mean(np.maximum(df2.estimated_weight_adj, df2.estimated_weight_adj2)))
    
fig, ax1 = plt.subplots()

ax2 = ax1.twinx()

ax1.plot(limits, avg_weights)
ax2.plot(limits, pcts, color = 'red')


In [None]:
plt.hist(df2.estimated_weight, bins = 30)

In [None]:
plt.hist(df2.estimated_weight_g, bins = 30)

In [None]:
plt.hist(np.maximum(df2.estimated_weight_adj, df2.estimated_weight_adj2), bins = 30)

In [None]:
count, bins, _ = plt.hist(df2.estimated_weight_g, density = True, bins = 30)
# count, bins, _ = plt.hist(df2.estimated_weight, density = True, bins = 30)

# cdf = np.cumsum(count)
# plt.plot(bins[1:], cdf)

def get_symmetry(i, l):
    bins1 = count[(i-l):i]
    bins2 = np.flip(count[i:(i+l)])
    
#     print(bins1)
#     print(bins2)
    
    return np.corrcoef(bins1, bins2)

top_results = []

for l in np.arange(5, 15):
    for i in np.arange(l, len(count) - l):
        symm = get_symmetry(i, l)[0, 1]

        result = (i, l, symm)

        if len(top_results) < 10:
            top_results.append(result)
        else:
            val, idx = min((val[2], idx) for (idx, val) in enumerate(top_results))
            if symm > val:
                top_results[idx] = result

In [None]:
top_results.sort(key=lambda x: -x[2])

for result in top_results:
    print(result)

reflection_idx, reflection_idx_length = top_results[0][0], top_results[0][1]

reflection_point = bins[reflection_idx + 1]
reflection_length = bins[reflection_idx + 1 + reflection_idx_length] - reflection_point

lower_point = reflection_point - reflection_length
upper_point = reflection_point + reflection_length

d1 = df2['estimated_weight_g']
# d1 = df2['estimated_weight']

d = np.concatenate([d1[d1 < upper_point], upper_point + lower_point - d1[d1 < lower_point]])
np.mean(d), np.mean(d1), reflection_point

In [None]:
df3 = df2[(df2.length < 2)].sort_values('estimated_weight', ascending = False)
pd.set_option('display.max_rows', 100)
df3[['estimated_weight', 'estimated_weight_g', 'length', 'akpd_score']].head(100)

df4 = df3[(df3.akpd_score > 0.01)]

In [None]:
x, y = np.abs(df4.coplanarity), df4.length_diff

plt.scatter(x, y)

X = x
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
plt.plot(x, results.predict(X), color = 'red')

results.summary()

In [None]:
# plt.scatter(df3.estimated_weight_g, df3.estimated_weight)


plt.scatter(df4.coplanarity, df4.estimated_weight_g - df4.estimated_weight)

x = df4.coplanarity
X = x
X = sm.add_constant(X)
y = df4.estimated_weight_g - df4.estimated_weight
model = sm.OLS(y, X)
results = model.fit()
plt.plot(x, results.predict(X), color = 'red')

print(np.mean(df4.estimated_weight_g), np.mean(get_weight(df4.length)))
results.summary()

In [None]:
df5 = df4.sort_values('weight_diff', ascending = True)
df5
df6 = df5[np.abs(df5['weight_diff']) < 10]
df6

In [None]:
def display_crops(left_image_f, right_image_f, ann, overlay_keypoints=True, show_labels=False):
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    left_image = plt.imread(left_image_f)
    right_image = plt.imread(right_image_f)
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    left_ann, right_ann = ann['leftCrop'], ann['rightCrop']
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in left_ann}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in right_ann}
    if overlay_keypoints:
        for bp, kp in left_keypoints.items():
            axes[0].scatter([kp[0]], [kp[1]], color='red', s=10)
            if show_labels:
                axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        for bp, kp in right_keypoints.items():
            axes[1].scatter([kp[0]], [kp[1]], color='red', s=10)
            if show_labels:
                axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df6.iloc[0]
print(row.akpd_score)
print(row.estimated_weight_g)
print(row.estimated_weight)
print(row.length)
print(row.coplanarity)
left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann, cm = row.annotation, row.camera_metadata
wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)


display_crops(left_crop_f, right_crop_f, ann, True, True)

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
row = df5.iloc[2]
print(row.akpd_score)
print(row.estimated_weight_g)
print(row.estimated_weight)
print(row.length)
print(row.coplanarity)
left_crop_url, right_crop_url = row.left_crop_url, row.right_crop_url
left_crop_f, _, _ = s3.download_from_url(left_crop_url)
right_crop_f, _, _ = s3.download_from_url(right_crop_url)
ann, cm = row.annotation, row.camera_metadata
wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)


display_crops(left_crop_f, right_crop_f, ann, True, True)

In [None]:
np.mean(np.array([wkps[keypoint] for keypoint in wkps]), 0)

In [None]:
.5 * (wkps['DORSAL_FIN'] + wkps['PELVIC_FIN']), .5 * (wkps['ADIPOSE_FIN'] + wkps['PECTORAL_FIN']), .5 * (wkps['DORSAL_FIN'] + wkps['PECTORAL_FIN']), row.depth, np.mean(np.array([wkps[keypoint] for keypoint in wkps]), 0)

In [None]:
ordered_wkps = ['UPPER_LIP', 'EYE', 'DORSAL_FIN', 'ADIPOSE_FIN', 'TAIL_NOTCH']

for keypoint in ordered_wkps:
    print('%s: %0.2f, %0.2f, %0.2f' % (keypoint, wkps[keypoint][0], wkps[keypoint][1], wkps[keypoint][2]))

In [None]:
for keypoint in wkps:
    print('%s: %0.2f, %0.2f, %0.2f' % (keypoint, wkps[keypoint][0], wkps[keypoint][1], wkps[keypoint][2]))

In [None]:
for keypoint in wkps:
    print('%s: %0.2f, %0.2f, %0.2f' % (keypoint, wkps[keypoint][0], wkps[keypoint][1], wkps[keypoint][2]))

In [None]:
ordered_wkps = ['UPPER_LIP', 'EYE', 'PECTORAL_FIN', 'PELVIC_FIN', 'ANAL_FIN', 'TAIL_NOTCH']

for keypoint in ordered_wkps:
    print('%s: %0.2f, %0.2f, %0.2f' % (keypoint, wkps[keypoint][0], wkps[keypoint][1], wkps[keypoint][2]))

In [None]:
ordered_wkps = ['UPPER_LIP', 'EYE', 'DORSAL_FIN', 'ADIPOSE_FIN', 'TAIL_NOTCH']

for keypoint in ordered_wkps:
    print('%s: %0.2f, %0.2f, %0.2f' % (keypoint, wkps[keypoint][0], wkps[keypoint][1], wkps[keypoint][2]))

In [None]:
centroid = .5 * (wkps['DORSAL_FIN'] + wkps['PELVIC_FIN'])

In [None]:
for keypoint in wkps:
    print(keypoint, get_angles(wkps[keypoint], centroid))

In [None]:
for keypoint in wkps:
    print(keypoint, get_angles(centroid, wkps[keypoint]))

In [None]:
ordered_keypoints = ['TAIL_NOTCH', 'ADIPOSE_FIN', 'UPPER_LIP', ''
np.mean(np.array([wkps[keypoint] for keypoint in wkps]), 0)

In [None]:
centroid = .5 * (wkps['DORSAL_FIN'] + wkps['PELVIC_FIN'])
np.linalg.norm(np.array(get_angles(wkps['UPPER_LIP'], centroid)) - np.array(get_angles(centroid, wkps['TAIL_NOTCH'])))

a = (wkps['UPPER_LIP'] - centroid) / np.linalg.norm(wkps['UPPER_LIP'] - centroid)

np.linalg.norm((wkps['UPPER_LIP'] - wkps['TAIL_NOTCH']) * a), row.length


# centroid = .5 * (wkps['ADIPOSE_FIN'] + wkps['PECTORAL_FIN'])
# centroid = .5 * (wkps['DORSAL_FIN'] + wkps['PECTORAL_FIN'])
# centroid = np.mean(np.array([wkps[keypoint] for keypoint in wkps]), 0)


In [None]:
get_angles(wkps['UPPER_LIP'], wkps['TAIL_NOTCH'])

In [None]:
get_angles(wkps['EYE'], wkps['TAIL_NOTCH'])

In [None]:
get_angles(wkps['ANAL_FIN'], wkps['TAIL_NOTCH'])

In [None]:
get_angles(wkps['ADIPOSE_FIN'], wkps['TAIL_NOTCH'])

In [None]:
get_angles(wkps['PECTORAL_FIN'], wkps['PELVIC_FIN'])

In [None]:
get_angles(wkps['PELVIC_FIN'], wkps['ANAL_FIN'])

In [None]:
get_angles(wkps['DORSAL_FIN'], wkps['ADIPOSE_FIN'])

In [None]:
def equation_plane(p1, p2, p3, p4):      
    a1 = p2[0] - p1[0] 
    b1 = p2[1] - p1[1]
    c1 = p2[2] - p1[2] 
    a2 = p3[0] - p1[0] 
    b2 = p3[1] - p1[1] 
    c2 = p3[2] - p1[2] 
    a = b1 * c2 - b2 * c1 
    b = a2 * c1 - a1 * c2 
    c = a1 * b2 - b1 * a2 
    d = (- a * p1[0] - b * p1[1] - c * p1[2]) 
      
    # equation of plane is: a*x + b*y + c*z = 0 # 
      
    # checking if the 4th point satisfies 
    # the above equation 
    return a * p4[0] + b * p4[1] + c * p4[2] + d

In [None]:
def get_angles(kp1, kp2):
    vector = kp1 - kp2
    x, y, z = vector / np.linalg.norm(vector)

    theta = math.atan(y / x) * np.sign(y)
    phi = math.acos(z)
    dtheta = math.degrees(theta)
    dphi = 90 - math.degrees(phi)
    
    return dtheta, dphi

In [None]:
equation_plane(wkps['TAIL_NOTCH'], wkps['DORSAL_FIN'], wkps['PELVIC_FIN'], wkps['UPPER_LIP'])

In [None]:
equation_plane(wkps['TAIL_NOTCH'], wkps['ADIPOSE_FIN'], wkps['ANAL_FIN'], wkps['UPPER_LIP'])

In [None]:
equation_plane(wkps['ADIPOSE_FIN'], wkps['DORSAL_FIN'], wkps['ANAL_FIN'], wkps['PELVIC_FIN'])

In [None]:
equation_plane(wkps['DORSAL_FIN'], wkps['PECTORAL_FIN'], wkps['UPPER_LIP'], wkps['EYE'])

In [None]:
df.head()

df_7000 = df[df['estimated_weight_g'] > 7000]
df_5000 = df[df['estimated_weight_g'] < 5000]
print(np.mean(df_7000['depth']))

In [None]:
df2 = df[df['date'] == '2020-10-27']
df95 = df2[df2['akpd_score'] > 0.95]
df99 = df2[df2['akpd_score'] > 0.99]
df1_5 = df2[df2['depth'] > 1.5]

print(len(df1_5), len(df2))
#plt.hist(df2['hour'])

In [None]:


buckets = np.linspace(4000, 7000, 100)

results = []

def adj_weight(x):
    return x ** (2/3)

for bucket in buckets:
    min_bucket = bucket - 1000
    max_bucket = bucket + 1000
    mask = (df.estimated_weight_g > min_bucket) & (df.estimated_weight_g < max_bucket)
#     mask = (adj_weight(df.estimated_weight_g) > adj_weight(min_bucket)) & (adj_weight(df.estimated_weight_g) < adj_weight(max_bucket))
    res = stats.weibull_min.fit(df[mask].depth, floc = 0.7)
    results.append(res)
    
results = np.array(results)

In [None]:
Y0 = results[:,0]
Y2 = results[:,2]
X = buckets
X = sm.add_constant(X)
model0 = sm.OLS(Y0,X)
model2 = sm.OLS(Y2,X)
m0 = model0.fit()
m2 = model2.fit()
# OLSresults = model.fit()
# OLSresults.summary()



In [None]:
def get_prob(depth, weight):
    v0 = m0.predict([1, weight])
    v1 = 0.7
    v2 = m2.predict([1, weight])

    prob = stats.weibull_min.pdf(depth, v0, v1, v2)
    
    return prob

weights = df.estimated_weight_g

weights_weight = []
weights_prob = []

for i, row in df.iterrows():
    prob = get_prob(row['depth'], row['estimated_weight_g'])
    
    if prob < 0.01:
        print(row['depth'], row['estimated_weight_g'])
    else:
        weights_weight.append(row['estimated_weight_g'])
        weights_prob.append(prob[0])
    
weights_weight = np.array(weights_weight)
weights_prob = np.array(weights_prob)

In [None]:
w1 = np.mean(weights)
w2 = np.sum(weights_weight / weights_prob) / np.sum(1 / weights_prob)

print(w1, w2)
print((w1 - w2) / w1)

In [None]:
min(weights2)

In [None]:
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,0])
plt.figure(figsize=(20, 5))
plt.plot(buckets, results[:,0])
plt.figure(figsize=(20, 5))
plt.plot(adj_weight(buckets), results[:,2])
plt.figure(figsize=(20, 5))
plt.plot(buckets, results[:,2])

In [None]:
mask1 = (df.estimated_weight_g > 4000) & (df.estimated_weight_g < 6000)
mask2 = (df.estimated_weight_g > 6000) & (df.estimated_weight_g < 8000)

d1 = df[mask1]
d2 = df[mask2]

plt.figure(figsize=(20, 10))
x = np.linspace(0, 3, 5000)
plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(d1['depth'])))
plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(d2['depth'])))
# plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(df_5000['depth'])))
# plt.plot(x, stats.weibull_min.pdf(x, *stats.weibull_min.fit(df_7000['depth'])))
# plt.hist(df['depth'], bins = 30, alpha = 0.5, density = True)
# plt.hist(df_5000['depth'], bins = 30, alpha = 0.5, density = True)
# plt.hist(df_7000['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(d1['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(d2['depth'], bins = 30, alpha = 0.5, density = True)

In [None]:
print(stats.weibull_min.fit(d1['depth'], floc=0.68))
print(stats.weibull_min.fit(d1['depth']))

In [None]:
plt.hist(df[df.estimated_weight_g > 7000]['depth'], bins = 30, alpha = 0.5, density = True)
plt.hist(df[df.estimated_weight_g < 5000]['depth'], bins = 30, alpha = 0.5, density = True)

In [None]:
#res = stats.probplot(df.depth, plot=plt)
#res = stats.probplot(df.depth, dist=stats.chi2, sparams=(50, ), plot=plt)
# res = stats.probplot(df.depth, dist=stats.weibull_min, sparams=(2, 0, 1.49), plot=plt)
res = stats.probplot(df.depth, dist=stats.weibull_min, sparams=stats.weibull_min.fit(df['depth']), plot=plt)
# res = stats.probplot(df_5000.depth, dist=stats.weibull_min, sparams=(2, ), plot=plt)
# res = stats.probplot(df_7000.depth, dist=stats.weibull_min, sparams=(2, ), plot=plt)

In [None]:
plt.figure(figsize=(20, 10))
density, bins, _ = plt.hist(df2.estimated_weight_g, bins = 30, alpha = 0.5, density = True, color = 'blue')
plt.hist(df1_5.estimated_weight_g, bins = bins, alpha = 0.5, density = True, color = 'red')
#plt.hist(df99.estimated_weight_g, bins = bins, alpha = 0.5, density = True, color = 'red')

In [None]:
print(np.mean(df2.estimated_weight_g))

buckets = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

for i in range(len(buckets) - 1):
    mask1 = (df2['estimated_weight_g'] > buckets[i]) & (df2['estimated_weight_g'] <= buckets[i + 1])
    
    print('%i: %0.2f' % (buckets[i], sum(mask1) / len(mask1)))

In [None]:
res = stats.probplot(df2.estimated_weight_g, plot=plt)