In [None]:
import json
import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from research.utils.image_utils import Picture
pd.set_option('display.max_colwidth', 500)

<h1> Define lice counting crop input </h1>

In [None]:
rds = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))
query = """
    SELECT
        pen_id,captured_at,left_crop_url,right_crop_url,left_crop_metadata,right_crop_metadata,camera_metadata
    FROM
        prod.crop_annotation
    WHERE 
        pen_id=95
    AND captured_at BETWEEN '2020-06-20' AND '2020-06-30' AND service_id=1 
    AND left_crop_url IS NOT NULL AND right_crop_url IS NOT NULL
    AND (annotation_state_id=1 OR annotation_state_id=8);
"""

df = rds.extract_from_database(query)

In [None]:
df.index.name = 'index'
f = '/root/data/alok/biomass_estimation/playground/pen_id_95_2020-06-20_2020-06-30_lice_crop_input.csv'
df.to_csv(f, index=False)

<h1> Define biomass pipeline result </h1>

In [None]:
f = '/root/data/alok/biomass_estimation/playground/pen_id_95_2020-06-20_2020-06-30_lice_crop_bm_output.csv'
df = pd.read_csv(f)

f = '/root/data/alok/biomass_estimation/playground/pen_id_95_2020-06-20_2020-06-30_lice_crop_input.csv'
odf = pd.read_csv(f)

In [None]:
df[df.akpd_score > 0.9].shape

In [None]:
sample_sizes = []
akpd_cutoffs = np.arange(0.9, 0.999, 0.01)
for akpd_cutoff in akpd_cutoffs:
    mask = df.akpd_score > akpd_cutoff
    sample_sizes.append(df[mask].shape[0])

plt.plot(akpd_cutoffs, sample_sizes)
plt.show()
    

In [None]:
df[df.akpd_score < 0.9].left_crop_url

In [None]:
s3_access_utils = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))

def display_crops(left_crop_url, right_crop_url, ann, overlay_keypoints=True, show_labels=False):
    left_p = Picture(s3_access_utils=s3_access_utils, image_url=left_crop_url)
    right_p = Picture(s3_access_utils=s3_access_utils, image_url=right_crop_url)
    left_p.enhance()
    right_p.enhance()
    left_image = left_p.get_image()
    right_image = right_p.get_image()
    fig, axes = plt.subplots(2, 1, figsize=(20, 20))
    axes[0].imshow(left_image)
    axes[1].imshow(right_image)
    left_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['leftCrop']}
    right_keypoints = {item['keypointType']: [item['xCrop'], item['yCrop']] for item in ann['rightCrop']}
    if overlay_keypoints:
        for bp, kp in left_keypoints.items():
            axes[0].scatter([kp[0]], [kp[1]], color='red', s=1)
            if show_labels:
                axes[0].annotate(bp, (kp[0], kp[1]), color='red')
        for bp, kp in right_keypoints.items():
            axes[1].scatter([kp[0]], [kp[1]], color='red', s=1)
            if show_labels:
                axes[1].annotate(bp, (kp[0], kp[1]), color='red')
    plt.show()

In [None]:
idx = 74
row = df[(df.akpd_score < 0.9)].iloc[idx]
display_crops(row.left_crop_url, row.right_crop_url, json.loads(row.annotation))

In [None]:
row

<h1> Show status quo result </h1>

In [None]:
query = """
    SELECT
        *
    FROM
        prod.biomass_computations
    WHERE 
        pen_id=95
    AND captured_at BETWEEN '2020-06-20' AND '2020-06-30'
"""

odf = rds.extract_from_database(query)

In [None]:
sample_sizes, new_sample_sizes = [], []
akpd_cutoffs = np.arange(0.9, 0.999, 0.01)
for akpd_cutoff in akpd_cutoffs:
    new_sample_sizes.append(df[df.akpd_score > akpd_cutoff].shape[0])
    sample_sizes.append(odf[odf.akpd_score > akpd_cutoff].shape[0])


plt.plot(akpd_cutoffs, sample_sizes, color='blue', label='with biomass cropper')
plt.plot(akpd_cutoffs, new_sample_sizes, color='red', label='with lice cropper')
plt.xlabel('AKPD score cutoff')
plt.ylabel('Sample Size')
plt.title('Aplavika (95), 2020-06-20 - 2020-06-30')
plt.legend()
plt.grid()
plt.show()
    

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.akpd_score, bins=20, color='red', label='with lice cropper')
plt.hist(odf.akpd_score, bins=20, color='blue', label='with biomass cropper')
plt.xlabel('AKPD score')
plt.legend()
plt.grid()
plt.show()