In [None]:
import json, os
import cv2
import torch
from multiprocessing import Pool, Manager
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.akpd import AKPD
from aquabyte.template_matching import find_matches_and_homography
from aquabyte.biomass_estimator import NormalizeCentered2D, NormalizedStabilityTransform, ToTensor, Network
from aquabyte.data_loader import KeypointsDataset, NormalizeCentered2D, ToTensor, BODY_PARTS
from torch.utils.data import Dataset, DataLoader
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from aquabyte.akpd_scorer import generate_confidence_score
from keras.models import load_model
import boto3
import pandas as pd
import numpy as np
import plotly.express as px
import time
from matplotlib import pyplot as plt

from collections import defaultdict
import datetime as dt
import json
import numpy as np
from sklearn.linear_model import LinearRegression
from collections import defaultdict



In [None]:
# extract dataframe
s3_access_utils = S3AccessUtils('/root/data')
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

pen_id, group_id = 88, '88'

query = """
    SELECT * FROM (
      (SELECT * FROM prod.crop_annotation cas
      INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
      WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
      AND cas.annotation_state_id = 3
      AND cas.pen_id=88) a
    RIGHT JOIN 
      (SELECT left_crop_url, estimated_weight_g, akpd_score FROM prod.biomass_computations
      WHERE prod.biomass_computations.captured_at between '2020-02-10' and '2020-03-10'
      AND prod.biomass_computations.akpd_score > 0.9) bc 
    ON 
      (a.left_crop_url=bc.left_crop_url)
    ) x
    WHERE x.captured_at between '2020-02-10' and '2020-03-10'
    AND x.pen_id = 88
    AND x.group_id = '88';
"""

df = rds_access_utils.extract_from_database(query)
df = df.sort_values('captured_at')
df = df[df.akpd_score > 0.9].copy(deep=True)
df.index = pd.to_datetime(df.captured_at)
df['hour'] = df.index.hour


<h1> Add in Length / K-Factor Analysis </h1>

In [None]:
class KeypointsDataset(Dataset):
    """Keypoints dataset
    This is the base version of the dataset that is used to map 3D keypoints to a
    biomass estimate. The label is the weight, and the input is the 3D workd keypoints
    obtained during triangulation
    """

    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform


    def __len__(self):
        return self.df.shape[0]


    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        if self.transform:
            input_sample = {
                'keypoints': row.keypoints,
                'cm': row.camera_metadata,
                'stereo_pair_id': row.id,
            }
            if 'length' in dict(row).keys():
                input_sample['label'] = row.length
            sample = self.transform(input_sample)
            return sample

        world_keypoints = row.world_keypoints
        length = row.length

        sample = {'kp_input': world_keypoints, 'label': length, 'stereo_pair_id': row.id}

        return sample
    
class NormalizedStabilityTransform(object):
    """
        Transforms world keypoints into a more stable coordinate system - this will lead to better
        training / convergene
    """
    
    def __call__(self, sample):
        modified_kps, label, stereo_pair_id, cm = \
            sample['modified_kps'], sample['label'], sample['stereo_pair_id'], sample['cm']
        modified_wkps = pixel2world(modified_kps['leftCrop'], modified_kps['rightCrop'], cm)
        stabilized_coordinates = {}
        for bp in BODY_PARTS:
            wkp = modified_wkps[bp]
            stabilized_kp_info = [0.5 * wkp[0]/wkp[1], 0.5 * wkp[2]/wkp[1], 0.5 * 0.1/wkp[1]]
            stabilized_coordinates[bp] = stabilized_kp_info
            
        normalized_label = label
        
        transformed_sample = {
            'kp_input': stabilized_coordinates,
            'label': normalized_label,
            'stereo_pair_id': stereo_pair_id,
            'single_point_inference': sample.get('single_point_inference')
        }
        
        return transformed_sample

In [None]:
# TODO: Define your network architecture here
import torch
from torch import nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x
    

        



In [None]:
network = torch.load('/root/data/alok/biomass_estimation/playground/kf_predictor.pb')
normalize_centered_2D_transform = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()

In [None]:
def generate_kf(row_id, akpd_keypoints, cm):

    # run length estimation
    input_sample = {
        'keypoints': akpd_keypoints,
        'cm': cm,
        'stereo_pair_id': row_id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps)
    kf_prediction = network(tensorized_kps['kp_input']).item()
    
    return kf_prediction


In [None]:
kfs = []
args = []
count = 0
for idx, row in df.iterrows():
    cm = row.camera_metadata
    akpd_keypoints = row.annotation
    row_id = idx
    kf = generate_kf(row_id, akpd_keypoints, cm)
    kfs.append(kf)
    
    if count % 100 == 0:
        print(count)
    count += 1

In [None]:
df['kf'] = kfs


In [None]:
plt.figure(figsize=(20, 10))
plt.hist(df.kf, bins=20)
plt.title('K Factor Distribution')
plt.grid()
plt.show()

In [None]:
df[df.k_factor > 2.5].shape[0] / df.shape[0]

In [None]:
plt.scatter(df.estimated_weight_g, df.k_factor)
plt.show()

In [None]:
weights = np.arange(500, 9000, 500)
mean_kfs = []
for idx in range(len(weights) - 1):
    lo, hi = weights[idx], weights[idx + 1]
    mask = (df.estimated_weight_g > lo) & (df.estimated_weight_g < hi)
    mean_kf = df[mask].k_factor.mean()
    mean_kfs.append(mean_kf)
    

In [None]:
plt.figure(figsize=(10, 5))
x_pos = np.arange(len(weights[:-1]))
plt.bar(x_pos, mean_kfs, align='edge', width=0.9)
plt.xticks(x_pos, weights[:-1])
plt.title('K-Factor vs. Weight')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Predicted K-factor')
plt.grid()
plt.show()

In [None]:
tdf = df[df.kf < np.percentile(df.kf, 100)].copy(deep=True)

plt.figure(figsize=(20, 10))
start_date, end_date = '2020-02-10', '2020-02-20'
plt.scatter(tdf[start_date:end_date].index, 
            tdf[start_date:end_date].estimated_weight_g,
            c=tdf[start_date:end_date].kf, cmap='viridis')
plt.colorbar()
plt.grid()
plt.xlabel('Date')
plt.ylabel('Estimated Weight (g)')
plt.title('Weight Predictions vs. Time, color-coded by K-Factor')
plt.show()