In [None]:
import json, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = 'white'
import matplotlib.colors
import statsmodels.api as sm

import pytz 

from datetime import timedelta, datetime

from research.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

cm = plt.cm.get_cmap('RdYlBu')

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
queryCache = {}

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

pen_id = 60
start_date = '2020-06-01'
end_date = '2020-06-30'
akpd_filter = 0.0

query = """
    SELECT * FROM (
      (SELECT * FROM prod.crop_annotation cas
      INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
      WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
      AND cas.annotation_state_id = 3
      AND cas.pen_id=%i) a
    RIGHT JOIN 
      (SELECT left_crop_url, estimated_weight_g, akpd_score FROM prod.biomass_computations
      WHERE prod.biomass_computations.captured_at >= '%s'
      AND prod.biomass_computations.captured_at <= '%s'
      AND prod.biomass_computations.akpd_score > %0.4f) bc 
    ON 
      (a.left_crop_url=bc.left_crop_url)
    ) x
    WHERE x.captured_at >= '%s'
    AND x.captured_at <= '%s'
    AND x.pen_id = %i
    AND x.group_id = '%i';
""" % (pen_id, start_date, end_date, akpd_filter, start_date, end_date, pen_id, pen_id)


if query in queryCache:
    df = queryCache[query].copy()
else:
    df = rds_access_utils.extract_from_database(query)
    queryCache[query] = df.copy()
    
df = df.sort_values('captured_at').copy(deep=True)
df.index = pd.to_datetime(df.captured_at)
dates = df.index.date.astype(str)
df['date'] = dates
df['hour'] = df.index.hour

if 'estimated_k_factor' not in df.columns.tolist():
    df['estimated_k_factor'] = 0.0

depths = []
for idx, row in df.iterrows():
    ann, cm = row.annotation, row.camera_metadata
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    depth = np.median([wkp[1] for wkp in wkps.values()])
    depths.append(depth)
df['depth'] = depths
    
df['quality_score'] = df.left_crop_metadata.apply(lambda x: x.get('quality_score') if x else None)
df['mean_luminance'] = df.left_crop_metadata.apply(lambda x: x.get('mean_luminance') if x else None)
df['edge_detection_score'] = df.left_crop_metadata.apply(lambda x: x.get('edge_detection_score') if x else None)
df['mean_green_luminance'] = df.left_crop_metadata.apply(lambda x: x.get('mean_green_luminance') if x else None)
df['objectConfidence'] = df.left_crop_metadata.apply(lambda x: x.get('category')['objectConfidence'] if x else None)
df['categoryConfidence'] = df.left_crop_metadata.apply(lambda x: x.get('category')['categoryConfidence'] if x else None)
df['skip_classifier_score'] = df.left_crop_metadata.apply(lambda x: x.get('skip_classifier_score') if x else None)

df 

In [None]:
df[df['service_id'] == 1]

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.98

df_mask = df[mask]

axes.scatter(df_mask['akpd_score'], df_mask['mean_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_luminance']
X = df_mask['akpd_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['akpd_score'], results.predict(), color='red')

axes.set_title('AKPD Score vs Mean Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('AKPD Score')
axes.set_ylabel('Mean Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['quality_score'], df_mask['mean_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_luminance']
X = df_mask['quality_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['quality_score'], results.predict(), color='red')

axes.set_title('Quality Score vs Mean Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Quality Score')
axes.set_ylabel('Mean Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['skip_classifier_score'], df_mask['mean_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_luminance']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['skip_classifier_score'], results.predict(), color='red')

axes.set_title('Skip Classifier Score vs Mean Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('Mean Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['depth'], df_mask['mean_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_luminance']
X = df_mask['depth']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['depth'], results.predict(), color='red')

axes.set_title('Depth vs Mean Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Depth')
axes.set_ylabel('Mean Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.98

df_mask = df[mask]

axes.scatter(df_mask['akpd_score'], df_mask['edge_detection_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['edge_detection_score']
X = df_mask['akpd_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['akpd_score'], results.predict(), color='red')

axes.set_title('AKPD Score vs Edge Detection Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('AKPD Score')
axes.set_ylabel('Edge Detection Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['quality_score'], df_mask['edge_detection_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['edge_detection_score']
X = df_mask['quality_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['quality_score'], results.predict(), color='red')

axes.set_title('Quality Score vs Edge Detection Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Quality Score')
axes.set_ylabel('Edge Detection Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['skip_classifier_score'], df_mask['edge_detection_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['edge_detection_score']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

#axes.plot(df_mask['edge_detection_score'], results.predict(), color='red')

axes.set_title('Skip Classifier Score vs Edge Detection Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('Edge Detection Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['depth'], df_mask['edge_detection_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['edge_detection_score']
X = df_mask['depth']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['depth'], results.predict(), color='red')

axes.set_title('Depth vs Edge Detection Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Depth')
axes.set_ylabel('Edge Detection Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.98

df_mask = df[mask]

axes.scatter(df_mask['akpd_score'], df_mask['mean_green_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_green_luminance']
X = df_mask['akpd_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['akpd_score'], results.predict(), color='red')

axes.set_title('AKPD Score vs Mean Green Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('AKPD Score')
axes.set_ylabel('Mean Green Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['quality_score'], df_mask['mean_green_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_green_luminance']
X = df_mask['quality_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['quality_score'], results.predict(), color='red')

axes.set_title('Quality Score vs Mean Green Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Quality Score')
axes.set_ylabel('Mean Green Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['skip_classifier_score'], df_mask['mean_green_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_green_luminance']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['skip_classifier_score'], results.predict(), color='red')

axes.set_title('Skip Classifier Score vs Mean Green Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('Mean Green Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['depth'], df_mask['mean_green_luminance'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['mean_green_luminance']
X = df_mask['depth']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['depth'], results.predict(), color='red')

axes.set_title('Depth vs Mean Green Luminance (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Depth')
axes.set_ylabel('Mean Green Luminance')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.98

df_mask = df[mask]

axes.scatter(df_mask['akpd_score'], df_mask['objectConfidence'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['objectConfidence']
X = df_mask['akpd_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['akpd_score'], results.predict(), color='red')

axes.set_title('AKPD Score vs Object Confidence (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('AKPD Score')
axes.set_ylabel('Object Confidence')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['quality_score'], df_mask['objectConfidence'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['objectConfidence']
X = df_mask['quality_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['quality_score'], results.predict(), color='red')

axes.set_title('Quality Score vs Object Confidence (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Quality Score')
axes.set_ylabel('Object Confidence')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['skip_classifier_score'], df_mask['objectConfidence'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['objectConfidence']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['skip_classifier_score'], results.predict(), color='red')

axes.set_title('Skip Classifier Score vs Object Confidence (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('Object Confidence')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['depth'], df_mask['objectConfidence'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['objectConfidence']
X = df_mask['depth']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['depth'], results.predict(), color='red')

axes.set_title('Depth vs Object Confidence (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Depth')
axes.set_ylabel('Object Confidence')

plt.colorbar(sc)

In [None]:
df.ix[0]['left_crop_metadata']

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.98

df_mask = df[mask]

axes.scatter(df_mask['akpd_score'], df_mask['quality_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['quality_score']
X = df_mask['akpd_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['akpd_score'], results.predict(), color='red')

axes.set_title('AKPD Score vs Quality Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('AKPD Score')
axes.set_ylabel('Quality Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.98

df_mask = df[mask]

axes.scatter(df_mask['skip_classifier_score'], df_mask['akpd_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['akpd_score']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['skip_classifier_score'], results.predict(), color='red')

axes.set_title('AKPD Score vs Skip Classifier Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('AKPD Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

mask = df['akpd_score'] > 0.9

df_mask = df[mask]

sc = axes.scatter(df_mask['depth'], df_mask['akpd_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['akpd_score']
X = df_mask['depth']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['depth'], results.predict(), color='red')

axes.set_title('AKPD Score vs Depth (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Depth')
axes.set_ylabel('AKPD Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# mask = df['akpd_score'] > 0.98

df_mask = df

sc = axes.scatter(df_mask['skip_classifier_score'], df_mask['quality_score'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['quality_score']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['skip_classifier_score'], results.predict(), color='red')

axes.set_title('Quality Score vs Skip Classifier Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('Quality Score')

plt.colorbar(sc)

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))
# mask = df['akpd_score'] > 0.98

df_mask = df

axes.scatter(df_mask['skip_classifier_score'], df_mask['depth'], c = df_mask['estimated_weight_g'], vmax = 6000, cmap = cm)

Y = df_mask['depth']
X = df_mask['skip_classifier_score']
X = sm.tools.tools.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()

axes.plot(df_mask['skip_classifier_score'], results.predict(), color='red')

axes.set_title('Depth vs Skip Classifier Score (%0.3f)' % (results.rsquared, ))
axes.set_xlabel('Skip Classifier Score')
axes.set_ylabel('Depth')

plt.colorbar(sc)

In [None]:
mask = (df['akpd_score'] < .9) & (df['skip_classifier_score'] > .2)
df[mask]

In [None]:
from IPython.core.display import Image, display

# masks = {
#     'good_akpd_good_quality': (df['akpd_score'] > .995) & (df['quality_score'] > .35),
#     'bad_akpd_good_quality': (df['akpd_score'] < .92) & (df['quality_score'] > .3),
#     'good_akpd_bad_quality': (df['akpd_score'] > .995) & (df['quality_score'] < .2),
#     'bad_akpd_bad_quality': (df['akpd_score'] < .92) & (df['quality_score'] < .2)
# }

# masks = {
#     'good_akpd_good_quality': (df['akpd_score'] > .995) & (df['quality_score'] > .33),
#     'bad_akpd_good_quality': (df['akpd_score'] < .9) & (df['quality_score'] > .33),
#     'good_akpd_bad_quality': (df['akpd_score'] > .995) & (df['quality_score'] < .15),
#     'bad_akpd_bad_quality': (df['akpd_score'] < .92) & (df['quality_score'] < .15)
# }

# masks = {
#     'good_akpd_good_quality': (df['akpd_score'] > .995) & (df['quality_score'] > .33),
#     'bad_akpd_good_quality': (df['akpd_score'] < .9) & (df['quality_score'] > .33),
#     'good_akpd_bad_quality': (df['akpd_score'] > .995) & (df['quality_score'] < .06),
#     'bad_akpd_bad_quality': (df['akpd_score'] < .9) & (df['quality_score'] < .06)
# }

masks = {
    'good_akpd_good_skip': (df['akpd_score'] > .99) & (df['skip_classifier_score'] > .99),
    'bad_akpd_good_skip': (df['akpd_score'] < .9) & (df['skip_classifier_score'] > .99),
    'good_akpd_bad_skip': (df['akpd_score'] > .99) & (df['skip_classifier_score'] < .01),
    'bad_akpd_bad_skip': (df['akpd_score'] < .9) & (df['skip_classifier_score'] < .01)
}

for mask_name, mask in masks.items():
    print(mask_name)
    
    df_mask = df[mask]

    num_imgs = min(10, len(df_mask))
    
    for i in range(num_imgs):
        url = df_mask.ix[i]['url_key']
        display(Image(url, width=300))