In [None]:
import json, os
import cv2
import torch
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.akpd import AKPD
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.template_matching import find_matches_and_homography
from aquabyte.data_loader import KeypointsDataset, NormalizeCentered2D, ToTensor, BODY_PARTS
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
import random
import torch
import pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [None]:
from mpl_toolkits import mplot3d
%matplotlib inline
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 

import matplotlib.pyplot as plt

In [None]:
ADIPOSE_FIN = 'ADIPOSE_FIN'
ANAL_FIN = 'ANAL_FIN'
DORSAL_FIN = 'DORSAL_FIN'
EYE = 'EYE',
PECTORAL_FIN = 'PECTORAL_FIN'
PELVIC_FIN = 'PELVIC_FIN'
TAIL_NOTCH = 'TAIL_NOTCH'
UPPER_LIP ='UPPER_LIP'

In [None]:

class NormalizedStabilityTransform(object):
    """
        Transforms world keypoints into a more stable coordinate system - this will lead to better
        training / convergene
    """
    
    def __call__(self, sample):
        modified_kps, label, stereo_pair_id, cm = \
            sample['modified_kps'], sample['label'], sample['stereo_pair_id'], sample['cm']
        modified_wkps = pixel2world(modified_kps['leftCrop'], modified_kps['rightCrop'], cm)
        stabilized_coordinates = {}
        for bp in BODY_PARTS:
            wkp = modified_wkps[bp]
            stabilized_kp_info = [0.5 * wkp[0]/wkp[1], 0.5 * wkp[2]/wkp[1], 0.5 * 0.1/wkp[1]]
            stabilized_coordinates[bp] = stabilized_kp_info
            
        normalized_label = label * 1e-4 if label else None
        
        transformed_sample = {
            'kp_input': stabilized_coordinates,
            'label': normalized_label,
            'stereo_pair_id': stereo_pair_id,
            'single_point_inference': sample.get('single_point_inference')
        }
        
        return transformed_sample

In [None]:
from sqlalchemy import create_engine


In [None]:
# load gtsf data

#rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_RESEARCH_SQL_CREDENTIALS'])))
query = """
    select * from research.fish_metadata a left join keypoint_annotations b
    on a.left_url = b.left_image_url 
    where b.keypoints -> 'leftCrop' is not null
    and b.keypoints -> 'rightCrop' is not null
    and b.is_qa = false
    order by b.captured_at
"""

results = prod_db_connection.execute(query)
df_gtsf = pd.DataFrame(results.fetchall())
df_gtsf.columns = results.keys()



In [None]:
df_gtsf.head()

In [None]:
df_gtsf['data_collection_type_id'].unique()

# load pre_trained model

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x

biomass_network = torch.load('/root/data/alok/biomass_estimation/results/neural_network/2019-11-08T00:13:09/nn_epoch_798.pb')

In [None]:
# amg = AccuracyMetricsGenerator()
# amg.generate_accuracy_metrics(y_pred, df_gtsf['weight'], None)

In [None]:
df

In [None]:
get_MPE(y_pred, df_gtsf['weight'])

In [None]:
get_MAPE(y_pred, df_gtsf['weight'])


In [None]:
def get_MPE(y_predict, y_train):
    # calculate mean percentage error
    return np.mean((y_train - y_predict) / y_train)

def get_MAPE(y_predict, y_train):
    # calcuate mean absolute percentage error
    return np.mean(np.absolute((y_train - y_predict) / y_train))

# Unit test

In [None]:
row = df_gtsf.iloc[13752]
input_sample = {
    'keypoints': row.keypoints,
    'cm': row.camera_metadata,
    'stereo_pair_id': row.id,
    'single_point_inference': True
}
nomralized_centered_2D_kps = \
        normalize_centered_2D_transform.__call__(input_sample)

normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
normalized_3D_kps = normalized_stability_kps['kp_input']

In [None]:
def get_cos(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

In [None]:
normalized_3D_kps

## Orientation

In [None]:

def get_orientation(normalized_3D_kps):
    # degree between tail to lip vector and 
    tail_lip = np.array(normalized_3D_kps[TAIL_NOTCH]) - np.array(normalized_3D_kps[UPPER_LIP])
    x_hat = np.array([1, 0, 0])
    tail_lip_xy_proj = tail_lip * np.array([1, 1, 0])
    cos_phi = get_cos(x_hat, tail_lip_xy_proj)
    return np.degrees(np.arccos(cos_phi))


In [None]:
get_orientation(normalized_3D_kps)


## Curvature

In [None]:
def get_curvature(normalized_3D_kps):
    dorsal_lip = np.array(normalized_3D_kps[DORSAL_FIN]) - np.array(normalized_3D_kps[UPPER_LIP])
    dorsal_tail = np.array(normalized_3D_kps[DORSAL_FIN]) - np.array(normalized_3D_kps[TAIL_NOTCH])

    dorsal_lip_xy_proj = dorsal_lip * np.array([1, 1, 0])
    dorsal_tail_xy_proj = dorsal_tail * np.array([1, 1, 0])

    cos_alpha = get_cos(dorsal_lip_xy_proj, dorsal_tail_xy_proj)
    return np.degrees(np.arccos(cos_alpha))



# lean

In [None]:
def get_lean(normalized_3d_kps):
    tail_lip = np.array(normalized_3D_kps[TAIL_NOTCH]) - np.array(normalized_3D_kps[UPPER_LIP])
    tail_lip_xz_proj = tail_lip * np.array([1, 0, 1])
    x_hat = np.array([1, 0, 0])
    cos_beta = get_cos(tail_lip_xz_proj, x_hat )
    return np.degrees(np.arccos(cos_beta))

In [None]:
def get_size(weight):
    if weight < 2000:
        return 'small'
    if weight < 6000:
        return 'medium'
    else:
        return 'large'

In [None]:
normalize_centered_2D_transform = NormalizeCentered2D()
normalized_stability_transform = NormalizedStabilityTransform()
to_tensor_transform = ToTensor()


df = pd.DataFrame(columns=["fish_id"
                           ,"weight"
                           ,"input_sample"
                           ,"orientation"
                           ,"curvature"
                           ,"lean"
                           ,"predicted_weight"
                           ,"size"
                          ])
y_pred = []

for idx, row in df_gtsf.iterrows():
    input_sample = {
        'keypoints': row.keypoints,
        'cm': row.camera_metadata,
        'stereo_pair_id': row.id,
        'single_point_inference': True
    }
    nomralized_centered_2D_kps = \
        normalize_centered_2D_transform.__call__(input_sample)

    normalized_stability_kps = normalized_stability_transform.__call__(nomralized_centered_2D_kps)
    tensorized_kps = to_tensor_transform.__call__(normalized_stability_kps) 
    normalized_3D_kps = normalized_stability_kps['kp_input']
    weight_prediction = biomass_network(tensorized_kps['kp_input']).item() * 1e4
    ornt = get_orientation(normalized_3D_kps)
    curv = get_curvature(normalized_3D_kps)
    lean = get_lean(normalized_3D_kps)
    df = df.append({'fish_id': row['fish_id']
                  ,'input_sample':input_sample
                  ,'weight': row['weight']
                  ,"orientation":ornt
                  ,"curvature":curv
                  ,"lean":lean
                  ,"predicted_weight":weight_prediction}, ignore_index=True)   

    
df['weight'] = df['weight'].astype(int)
df['error'] = df['weight'] - df['predicted_weight']
df['size'] = df['weight'].apply(get_size)       

In [None]:
df.shape

In [None]:
df.head(10)

In [None]:
sum(df['size'] == 'small') / df.shape[0]

In [None]:
sum(df['size'] == 'medium') / df.shape[0]

In [None]:
sum(df['size'] == 'large') / df.shape[0]

In [None]:
_ = plt.hist(df['weight'], bins=10)  # arguments are passed to np.histogram
#_ = plt.hist(orientation, bins=20)
plt.title("Histogram of GTSF weight")
plt.xlabel('weight (g)')
plt.show()

In [None]:
plt.title('error plot')

plt.axvline(x=2000, color='r', linestyle='--')
plt.axvline(x=6000, color='r', linestyle='--')
plt.scatter('weight', 'error', data = df, marker='.')
plt.xlabel('ground truth weight (g)')
plt.ylabel('error (g)')
plt.show()

In [None]:
df[df.error == df.error.max()]

In [None]:
df[df.error == df.error.min()]

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(10, 5))
fig.suptitle('Error plots')
ax1.plot('weight', 'error', 
         data=df, linestyle='none', marker='.')
ax2.plot('predicted_weight', 'weight', 
         data=df, linestyle='none', marker='.')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3,figsize=(20, 5), sharey=True)
fig.suptitle('Error plots')

axes[0].plot('orientation', 'error', 
         data=df, linestyle='none', marker='.')
axes[0].set(xlabel='orientation', ylabel='error')

axes[1].plot('curvature', 'error', 
         data=df, linestyle='none', marker='.')
axes[1].set(xlabel='curvature')

axes[2].plot('lean', 'error', 
         data=df, linestyle='none', marker='.')
axes[2].set(xlabel='lean')

plt.show()

In [None]:
plt.plot('orientation', 'error', 
         data=df.drop([np.argmin(df['orientation'])], axis = 0), linestyle='none', marker='.')
plt.title('Orientation vs error')
plt.xlabel('orientation (degree)')
plt.ylabel('error')
plt.show()

In [None]:
plt.plot('curvature', 'error', 
         data=df, linestyle='none', marker='.')
plt.show()

In [None]:
plt.plot('lean', 'error', 
         data=df, linestyle='none', marker='.')
plt.show()

In [None]:
df['lean'].describe()

In [None]:
_ = plt.hist(df['curvature'], bins=10, range = (100, 180))  # arguments are passed to np.histogram
#_ = plt.hist(orientation, bins=20)
plt.title("Histogram")

plt.show()

# regression model

In [None]:
df.dropna(inplace = True) # keypoints nan leads to orientation nan
df = df.drop(df[df['orientation'] < 90].index)
X = df[['size', 'orientation']]
X['size'] = X['size'].astype('category')
X = pd.get_dummies(X, columns=['size'], prefix = ['size'])


In [None]:
X.head(5)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    df.error,
                                                    test_size=0.2, 
                                                    random_state=0)

In [None]:
regressor = LinearRegression()  
regressor.fit(X_train, y_train)


In [None]:
print(regressor.score(X_test, y_test))

In [None]:
regressor.coef_ 

overpredict when more alined with the frame

underpredict for large fish

In [None]:
get_MPE(df[df['size'] == 'small']['predicted_weight'],
        df[df['size'] == 'small']['weight'])

In [None]:
get_MPE(df[df['size'] == 'medium']['predicted_weight'],
        df[df['size'] == 'medium']['weight'])

In [None]:
get_MPE(df[df['size'] == 'large']['predicted_weight'],
        df[df['size'] == 'large']['weight'])

In [None]:
df[df['size'] == 'large'].shape[0]

In [None]:
df[df['size'] == 'medium'].shape[0]

In [None]:
df[df['size'] == 'small'].shape[0]

In [None]:
ind = np.argmin(df[df['size'] == 'small']['error'])
f_id = df[df['size'] == 'small'].iloc[ind]['fish_id']

df.loc[df['fish_id'] == f_id].shape

In [None]:
# df_gtsf.loc[df_gtsf['fish_id'] == f_id]['id']

In [None]:
# df_gtsf.loc[df_gtsf['id'] == 606388]['keypoints'].values

In [None]:
val, cnt = np.unique(df['fish_id'], return_counts=True)


In [None]:
f_ids = val[cnt > 20]

In [None]:
_ = plt.hist(cnt, bins=10)  # arguments are passed to np.histogram
#_ = plt.hist(orientation, bins=300)
plt.title("Histogram")

plt.show()

In [None]:
figure, axes = plt.subplots(nrows=3, ncols=3, figsize=(20, 20), sharex=True)

for i in range(3):
    for j in range(3):
        # axes.ylim(-500, 500)
        f_id = f_ids[i * 3 + j]
        temp = df.loc[df['fish_id'] == f_id]
        axes[i, j].plot('orientation', 'error',
                        data=temp, linestyle='none', marker='.')
        axes[i, j].set_title('weight (g): {} '.format(temp['weight'].iloc[0]))

        axes[i, j].set(ylabel='error')

#plt.xlabel('orientation')        
figure.tight_layout()


In [None]:
f_ids

In [None]:
# get rows for the fish that have most records
f_id = val[np.argmax(cnt)]
#f_id = '190710-ce6f49e0-eb12-4655-985b-0fdd82ab519a'
#f_id = '190717-d2573589-6b5b-4f19-a0b9-4714432b5209'
#f_id = '190725-56a4b6d5-edfb-4fc2-9f81-cb152901da40'

temp = df.loc[df['fish_id'] == f_id]
print('weight of the fish', temp['weight'].iloc[0])
plt.ylim(-100, 600) 
plt.plot('orientation', 'error', 
         data=temp, linestyle='none', marker='.')
plt.xlabel("orientation")
plt.ylabel("")
plt.show()


In [None]:
coff = []
for f_id in f_ids:
    temp = df.loc[(df['fish_id'] == f_id) & (np.absolute(df['error']) < 800)]
    X = temp[['orientation']] 
    X_train, X_test, y_train, y_test = train_test_split(X, temp.error, test_size=0.2, random_state=0)
    regressor = LinearRegression()  
    regressor.fit(X_train, y_train)
    coff.append(regressor.coef_)
np.mean(coff)