Download prod data

In [None]:
import os
import pandas as pd
from aquabyte.data_access_utils import DataAccessUtils

In [None]:
os.environ['AWS_CREDENTIALS'] = '/root/thomas/aws_credentials.json'
os.environ['PROD_SQL_CREDENTIALS'] = '/root/thomas/sqlcredentials.json'

In [None]:
datacess = DataAccessUtils()

In [None]:
query = 'select * from keypoint_annotations where pen_id=7'

In [None]:
original_df = datacess.extract_from_database(query)
print(original_df.shape)

In [None]:
# df = df[df['keypoints'].notnull()]
# print(df.shape)

Create new column

In [None]:
from aquabyte.optics import convert_to_world_point, depth_from_disp, pixel2world, euclidean_distance
import numpy as np

In [None]:
# establish new columns
mask = (original_df.is_skipped == False) & (~original_df.keypoints.isnull())
for col in ['left_keypoints', 'right_keypoints', 'world_keypoint_coordinates']:
    original_df[col] = np.nan
    original_df[col] = original_df[col].astype(object)
for col in ['predicted_biomass_linear', 'predicted_biomass_blender', 'max_y_coordinate_deviation']:
    original_df[col] = np.nan


# modify the dataframe row-by-row
for idx, row in original_df[mask].iterrows():
    keypoints = row.keypoints
    left_keypoints = keypoints['leftCrop']
    right_keypoints = keypoints['rightCrop']
            
    # compute world coordinates
    camera_metadata = row.camera_metadata
    camera_metadata['pixelCountHeight'] = 3000
    camera_metadata['pixelCountWidth'] = 4096
    world_keypoint_coordinates = pixel2world(left_keypoints, right_keypoints, camera_metadata)
    
    # update dataframe with world keypoint coordinates
    original_df.at[idx, 'left_keypoints'] = left_keypoints
    original_df.at[idx, 'right_keypoints'] = right_keypoints
    original_df.at[idx, 'world_keypoint_coordinates'] = world_keypoint_coordinates

In [None]:
# original_df.to_csv('/root/data/bati/bremnes_data.csv')

Train model

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras import layers, optimizers
from keras.models import Model

In [None]:
dataframe = pd.read_csv('/root/data/alok/biomass_estimation/df.csv')
dataframe = dataframe.sample(frac=1).reset_index(drop=True)

In [None]:
keypoints_order = ["TAIL_NOTCH",
                    "ADIPOSE_FIN",
                    "UPPER_LIP",
                    "ANAL_FIN",
                    "PELVIC_FIN",
                    "EYE",
                    "PECTORAL_FIN",
                    "DORSAL_FIN"]

Load data

In [None]:
X = []
for wkp in dataframe['world_keypoints']:
    tmp = []
    for kp in keypoints_order:
        coord = eval(wkp)[kp]
        tmp.append(coord)
    X.append(tmp)

In [None]:
Y = np.array(dataframe['weight'])
Y = np.expand_dims(Y, axis=-1)

In [None]:
plt.hist(Y)
plt.show()

In [None]:
X = np.array(X)
X.shape

In [None]:
print(np.min(X[..., 0]), np.max(X[..., 0]), np.median(X[..., 0]))
print(np.min(X[..., 1]), np.max(X[..., 1]), np.median(X[..., 1]))
print(np.min(X[..., 2]), np.max(X[..., 2]), np.median(X[..., 2]))

Create Train / Val

In [None]:
N = X.shape[0]
cutoff = int(N*0.8)

In [None]:
x_train = X[:cutoff]
y_train = Y[:cutoff]
x_val = X[cutoff:]
y_val = Y[cutoff:]

In [None]:
# plt.hist(X[...,1])
# plt.xlim([-2, 10])
# plt.show()

Create model

In [None]:
inp = layers.Input(shape=(8, 3))
x = layers.Flatten()(inp)
x = layers.Dense(200, activation='relu')(x)
x = layers.Dense(200, activation='relu')(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(50, activation='relu')(x)
x = layers.Dense(1, activation='linear')(x)
# x = layers.Dense(100, activation='relu')(x)
# x = layers.Dense(100, activation='relu')(x)
# x = layers.Dense(50, activation='relu')(x)
# x = layers.Dense(50, activation='relu')(x)
# x = layers.Dense(1, activation='linear')(x)
model = Model(inputs=[inp], outputs=[x])

In [None]:
model.summary()

Train

In [None]:
model.compile(optimizer=optimizers.adam(lr=1e-3),
              loss='mean_absolute_error')

In [None]:
# model.compile(optimizer=optimizers.adam(lr=1e-3),
#               loss='mean_squared_error')

In [None]:
history = model.fit(x_train, 
                    y_train, 
                    validation_data=[x_val, y_val], 
                    epochs=1000)

In [None]:
h = history.history
plt.plot(h['loss'])
plt.plot(h['val_loss'])
plt.show()

Forward

In [None]:
from scipy.stats import gaussian_kde

In [None]:
y_pred = model.predict(x_val)

In [None]:
y_pred_train = model.predict(x_train)

In [None]:
kde_pred = gaussian_kde(y_pred.squeeze())
kde_val = gaussian_kde(y_val.squeeze())

In [None]:
plt.plot(kde_pred(range(0, 8000, 1)))
plt.plot(kde_val(range(0, 8000, 1)))
plt.show()

In [None]:
plt.scatter(y_train, y_pred_train, c='r')
plt.scatter(y_val, y_pred)
plt.plot([0, 8000], [0, 8000], 'k')
# plt.ylim([0, 8000])
# plt.xlim([0, 8000])
# plt.axis('square')
plt.show()

In [None]:
np.mean(np.abs(y_pred - y_val))

In [None]:
np.abs((np.mean(y_pred) - np.mean(y_val)) / np.mean(y_val))*100

Now let's retrain without val data - only for 400 epochs though (after that -> overfit)

In [None]:
inp = layers.Input(shape=(8, 3))
x = layers.Flatten()(inp)
x = layers.Dense(200, activation='relu')(x)
x = layers.Dense(200, activation='relu')(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(50, activation='relu')(x)
x = layers.Dense(1, activation='linear')(x)
# x = layers.Dense(100, activation='relu')(x)
# x = layers.Dense(100, activation='relu')(x)
# x = layers.Dense(50, activation='relu')(x)
# x = layers.Dense(50, activation='relu')(x)
# x = layers.Dense(1, activation='linear')(x)
model = Model(inputs=[inp], outputs=[x])

In [None]:
model.compile(optimizer=optimizers.adam(lr=1e-3),
              loss='mean_absolute_error')

In [None]:
history = model.fit(X, 
                    Y, 
                    epochs=450)

In [None]:
h = history.history
plt.plot(h['loss'])
plt.show()

Now prediction on bremnes data

In [None]:
bremnes_df = pd.read_csv('/root/data/bati/bremnes_data.csv')

In [None]:
X_prod = []
for wkp in bremnes_df['world_keypoint_coordinates']:
    tmp = []
    if str(wkp) == 'nan':
        continue
    for kp in keypoints_order:
        coord = eval(wkp)[kp]
        tmp.append(coord[::-1])
    X_prod.append(tmp)

In [None]:
y_pred_prod = model.predict(np.array(X_prod))

In [None]:
# plt.hist(np.array(X_prod)[..., 0].flatten())
# plt.show()

In [None]:
# plt.hist(np.array(X)[..., 0].flatten())
# plt.show()

In [None]:
np.mean(y_pred_prod)

In [None]:
plt.hist(y_pred_prod, bins=50)
plt.xlim([0, 10000])
plt.show()