In [None]:
import random
from research.weight_estimation.gtsf_data.gtsf_dataset import GTSFDataset
from research.weight_estimation.gtsf_data.gtsf_augmentation import GTSFAugmentation

import os
import math
import numpy as np
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import load_model
import tensorflow.keras as keras
from datetime import datetime
import tensorflow as tf
from matplotlib import pyplot as plt

<h1> Load GTSF Data </h1>

In [None]:
akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
gtsf_dataset = GTSFDataset('2019-03-01', '2020-02-10', akpd_scorer_url)
df = gtsf_dataset.get_prepared_dataset()



In [None]:
df.head(3)

<h1> Perform Augmentation </h1>

In [None]:
df = df[(df.captured_at < '2019-09-20') & (df.median_depth < 1.0) & (df.akpd_score > 0.9)]
gtsf_augmentation = GTSFAugmentation(df)
y_bounds, max_jitter_std, trials = (0.5, 3.0), 10, 40
augmented_df = gtsf_augmentation.generate_augmented_dataset(y_bounds, max_jitter_std, trials, random_seed=0)
print(augmented_df.shape)

In [None]:
augmented_df.head(3)

In [None]:
augmented_df.iloc[0]["original_ann"]

<h1> Create Train / Test Split </h1>

In [None]:

def generate_stabilized_input(augmented_df, mask=None):
    
    if mask is not None:
        X = augmented_df[mask].wkps.values
        y = 1e-4 * augmented_df[mask].weight.values
    else:
        X = augmented_df.wkps.values
        y = 1e-4 * augmented_df.weight.values
    X = np.concatenate(X).reshape(X.shape[0], 8, 3)
    
    X_new = np.zeros(X.shape)
    X_new[:, :, 0] = 0.5 * X[:, :, 0] / X[:, :, 1]
    X_new[:, :, 1] = 0.5 * X[:, :, 2] / X[:, :, 1]
    X_new[:, :, 2] = 0.05 / X[:, :, 1]
    X_new = X_new.reshape(-1, 24)
    return X_new, y

In [None]:

fish_ids = augmented_df.fish_id.unique()
train_pct, val_pct, test_pct = 0.8, 0.1, 0.1
train_cnt, val_cnt, test_cnt = np.random.multinomial(len(fish_ids), [train_pct, val_pct, test_pct])
assignments = np.array([0] * train_cnt + [1] * val_cnt + [2] * test_cnt)
np.random.shuffle(assignments)
train_fish_ids = fish_ids[np.where(assignments == 0)]
val_fish_ids = fish_ids[np.where(assignments == 1)]
test_fish_ids = fish_ids[np.where(assignments == 2)]

train_mask = augmented_df.fish_id.isin(train_fish_ids)
val_mask = augmented_df.fish_id.isin(val_fish_ids)
test_mask = augmented_df.fish_id.isin(test_fish_ids)

X_train, y_train = generate_stabilized_input(augmented_df, train_mask)
X_val, y_val = generate_stabilized_input(augmented_df, val_mask)
X_test, y_test = generate_stabilized_input(augmented_df, test_mask)


<h1> Train Neural Network in Keras </h1>

In [None]:
X_train.shape

In [None]:
def get_abs_error_pct(y_pred, y_gt):
    return np.mean(np.abs((y_pred - y_gt) / y_gt))

def get_pct_diff(y_pred, y_gt):
    return (np.mean(y_pred) - np.mean(y_gt)) / np.mean(y_gt)




In [None]:
model = Sequential()
model.add(Dense(256, input_dim=24, activation='relu', name='layer_1'))

model.add(Dense(128, activation='relu', name='layer_2'))

model.add(Dense(64, activation='relu', name='layer_3'))

model.add(Dense(1, activation='linear', name='output_layer'))
earlystopping = keras.callbacks.EarlyStopping(monitor='val_loss',
                               min_delta=0,
                               patience=10,
                               verbose=0, 
                               mode='auto')

log_dir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
logger = tf.keras.callbacks.TensorBoard(
    log_dir='logs',
    write_graph=True,
    histogram_freq=5
)


optimizer = keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='mean_squared_error',  metrics=['accuracy'])
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                    callbacks=[earlystopping, tensorboard_callback], batch_size=64, epochs=500)
y_pred = model.predict(X_test).squeeze()



In [None]:

print("abs error percentage is {}".format(get_abs_error_pct(y_pred, y_test)))
print("Percentage difference between predicted average and ground truth average {}".format(get_pct_diff(y_pred, y_test)))

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model_original.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_original.h5")
print("Saved model to disk")

In [None]:
dropout_rate = np.array([[0.1, 0.3, 0.5], [0.2, 0.3], [0.1, 0.2]])

In [None]:
model

In [None]:

for i, d1 in enumerate(dropout_rate[0]):
    for j, d2 in enumerate(dropout_rate[1]):
        for k, d3 in enumerate(dropout_rate[2]):
            s = "d1_{}_d2_{}_d3_{}".format(d1, d2, d3)
            print(s)
            model = Sequential()
            model.add(Dense(256, input_dim=24, activation='relu', name='layer_1'))
            model.add(Dropout(d1))
            model.add(Dense(128, activation='relu', name='layer_2'))
            model.add(Dropout(d2))
            model.add(Dense(64, activation='relu', name='layer_3'))
            model.add(Dropout(d3))
            model.add(Dense(1, activation='linear', name='output_layer'))
            callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
                                           min_delta=0,
                                           patience=10,
                                           verbose=0, 
                                           mode='auto')]
            optimizer = keras.optimizers.Adam(learning_rate=1e-4)
            model.compile(optimizer=optimizer, loss='mean_squared_error',  metrics=['accuracy'])
            history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                                callbacks=callbacks, batch_size=64, epochs=500)
            y_pred = model.predict(X_test).squeeze()
            
            l1 = get_abs_error_pct(y_pred, y_test)
            l2 = get_pct_diff(y_pred, y_test)
            print("abs error percentage is {}".format(l1))
            print("Pct diff between predicted avg and ground truth avg {}".format(l2))
            

            L1[(d1, d2, d3)] = l1
            L2[(d1, d2, d3)] = l2
            model_json = model.to_json()
            with open("model_{}.json".format(s), "w") as json_file:
                json_file.write(model_json)
            # serialize weights to HDF5
            model.save_weights("model_{}.h5".format(s))
            print("Saved model to disk")
            
            
            
            

In [None]:
L1

In [None]:
L2

In [None]:
log_dir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
logger = tf.keras.callbacks.TensorBoard(
    log_dir='logs',
    write_graph=True,
    histogram_freq=5
)


optimizer = keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer,
              loss='mean_squared_error',
              metrics=['accuracy'])
model.fit(X_train, 
          1e-4 * y_train, 
          validation_data=(X_test, 1e-4 * y_test), 
          batch_size=25, 
          epochs=100, 
          callbacks=[logger])

In [None]:
plt.plot(history.history['val_loss'])

In [None]:
history.history


In [None]:
y_pred = model.predict(X_test).squeeze()

In [None]:
y_pred

In [None]:

keras.losses.MSE( y_test, y_pred)

In [None]:
!tensorboard --host localhost --port 6006 --logdir ./logs

In [None]:
tf.__version__