In [None]:
%load_ext autoreload
%autoreload 2

import json
import pandas as pd
import numpy as np
from keras.models import load_model
from research_lib.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from weight_estimation.dataset import prepare_gtsf_data, compute_akpd_score
from weight_estimation.train import train, augment, normalize, get_data_split, train_model
from typing import Dict, Tuple


<h1> Load canonical GTSF dataset (to be versioned) </h1>

In [None]:
augmented_df = pd.read_csv('/root/data/alok/biomass_estimation/playground/augmented_df_depth_weight_balanced.csv')

new_anns, new_cms = [], []
for idx, row in augmented_df.iterrows():
    cm = row.camera_metadata
    new_cm = json.loads(cm.replace("'", '"'))
    new_cms.append(new_cm)
    
    ann = row.annotation
    new_ann = json.loads(ann.replace("'", '"'))
    new_anns.append(new_ann)
    
augmented_df['annotation'] = new_anns
augmented_df['camera_metadata'] = new_cms


<h1> Alternatively, run the section below to create augmented GTSF dataset on the fly </h1>

In [None]:
from weight_estimation.utils import get_left_right_keypoint_arrs, convert_to_world_point_arr

def augment(df: pd.DataFrame, augmentation_config: Dict) -> pd.DataFrame:
    print('hello')
    
    counts, edges = np.histogram(df.weight, bins=np.arange(0, 10000, 1000))
    trial_values = (5.0 / (counts / np.max(counts))).astype(int)
    max_jitter_std = augmentation_config['max_jitter_std']
    min_depth = augmentation_config['min_depth']
    max_depth = augmentation_config['max_depth']

    augmented_data = defaultdict(list)
    for idx, row in df.iterrows():
        
        camera_metadata = row.camera_metadata
        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )
        
        weight = row.weight
        trials = trial_values[min(int(weight / 1000), len(trial_values) - 1)]
        for _ in range(trials):
            
            ann = row.keypoints
            X_left, X_right = get_left_right_keypoint_arrs(ann)
            wkps = convert_to_world_point_arr(X_left, X_right, cm)
            original_depth = np.median(wkps[:, 1])
            
            depth = np.random.uniform(min_depth, max_depth)
            scaling_factor = float(original_depth) / depth
#             jitter_std = np.random.uniform(0, max_jitter_std)
            jitter_std = 5 * scaling_factor

            # rescale
            X_left = X_left * scaling_factor
            X_right = X_right * scaling_factor

            # add jitter
            X_left[:, 0] += np.random.normal(0, jitter_std, X_left.shape[0])
            X_right[:, 0] += np.random.normal(0, jitter_std, X_right.shape[0])

            # reconstruct annotation
            ann = get_ann_from_keypoint_arrs(X_left, X_right)
            augmented_data['annotation'].append(ann)
            augmented_data['fish_id'].append(row.fish_id)
            augmented_data['weight'].append(row.weight)
            augmented_data['kf'].append(row.k_factor)
            augmented_data['camera_metadata'].append(row.camera_metadata)

    augmented_df = pd.DataFrame(augmented_data)
    return augmented_df

In [None]:
def get_jitter(kp, depth):
    if kp == 'ADIPOSE_FIN':
        return -0.38 * depth + 17.15
    elif kp == 'ANAL_FIN':
        return -0.29 * depth + 13.74
    elif kp == 'DORSAL_FIN':
        return -0.19 * depth + 19.81
    elif kp == 'EYE':
        return -0.29 * depth + 13.17
    elif kp == 'PECTORAL_FIN':
        return -0.28 * depth + 16.44
    elif kp == 'PELVIC_FIN':
        return -0.36 * depth + 15.14
    elif kp == 'TAIL_NOTCH':
        return -0.61 * depth + 17.65
    elif kp == 'UPPER_LIP':
        return -0.66 * depth + 18.04

core_body_parts = sorted(['UPPER_LIP',
                          'EYE',
                          'PECTORAL_FIN',
                          'DORSAL_FIN',
                          'PELVIC_FIN',
                          'ADIPOSE_FIN',
                          'ANAL_FIN',
                          'TAIL_NOTCH'])
    
def augment2(df: pd.DataFrame, augmentation_config: Dict) -> pd.DataFrame:
    print('hello')
    
    counts, edges = np.histogram(df.weight, bins=np.arange(0, 10000, 1000))
    trial_values = (5.0 / (counts / np.max(counts))).astype(int)
    max_jitter_std = augmentation_config['max_jitter_std']
    min_depth = augmentation_config['min_depth']
    max_depth = augmentation_config['max_depth']

    augmented_data = defaultdict(list)
    for idx, row in df.iterrows():
        
        camera_metadata = row.camera_metadata
        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )
        
        weight = row.weight
        trials = trial_values[min(int(weight / 1000), len(trial_values) - 1)]
        for _ in range(trials):
            
            ann = row.keypoints
            X_left, X_right = get_left_right_keypoint_arrs(ann)
            wkps = convert_to_world_point_arr(X_left, X_right, cm)
            original_depth = np.median(wkps[:, 1])
            
            depth = np.random.uniform(min_depth, max_depth)
            scaling_factor = float(original_depth) / depth
#             jitter_std = np.random.uniform(0, max_jitter_std)
            jitter_stds = np.array([ get_jitter(kp, depth) for kp in core_body_parts ])
    
            # rescale
            X_left = X_left * scaling_factor
            X_right = X_right * scaling_factor

            # add jitter
            X_left[:, 0] += np.random.normal(0, jitter_stds, X_left.shape[0])
            X_right[:, 0] += np.random.normal(0, jitter_stds, X_right.shape[0])
            
#             X_left[:, 0] += np.random.standard_t(10, X_left.shape[0]) * jitter_stds
#             X_right[:, 0] += np.random.standard_t(10, X_right.shape[0]) * jitter_stds

            # reconstruct annotation
            ann = get_ann_from_keypoint_arrs(X_left, X_right)
            augmented_data['annotation'].append(ann)
            augmented_data['fish_id'].append(row.fish_id)
            augmented_data['weight'].append(row.weight)
            augmented_data['kf'].append(row.k_factor)
            augmented_data['camera_metadata'].append(row.camera_metadata)

    augmented_df = pd.DataFrame(augmented_data)
    return augmented_df

In [None]:
s3 = S3AccessUtils('/root/data')
akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
akpd_scorer_f, _, _ = s3.download_from_url(akpd_scorer_url)
df1 = prepare_gtsf_data('2019-03-01', '2019-09-20', akpd_scorer_f, 0.5, 1.0)

df2 = prepare_gtsf_data('2020-06-01', '2020-08-20', akpd_scorer_f, 0.5, 1.0)
df = pd.concat([df1, df2])

# Start here

In [None]:
def simulate_larger_fish(world_keypoints, weight, pct_inflation):
    modified_world_keypoints = {}
    for body_part in core_body_parts:
        kps = world_keypoints[body_part]
        modified_kps = (1.0 + pct_inflation) * kps
        modified_world_keypoints[body_part] = modified_kps
        
#     modified_weight = (1.0 + pct_inflation)**2.88 * weight
    modified_weight = (1.0 + pct_inflation)**3.0852 * weight
    return modified_world_keypoints, modified_weight
    
def simulate_larger_fish2(world_keypoints, weight, pct_inflation):
    modified_world_keypoints = {}
    for body_part in core_body_parts:
        kps = world_keypoints[body_part]
        modified_kps = (1.0 + pct_inflation) * kps
        modified_world_keypoints[body_part] = modified_kps
        
    length = np.linalg.norm(world_keypoints['UPPER_LIP'] - world_keypoints['TAIL_NOTCH'])
    width = np.linalg.norm(world_keypoints['DORSAL_FIN'] - world_keypoints['PELVIC_FIN'])
    
    modified_length = np.linalg.norm(modified_world_keypoints['UPPER_LIP'] - modified_world_keypoints['TAIL_NOTCH'])
    modified_width = np.linalg.norm(modified_world_keypoints['DORSAL_FIN'] - modified_world_keypoints['PELVIC_FIN'])
        
#     calc_weight = np.exp(11.4894 + 1.6489 * np.log(length) + 1.3924 * np.log(width))
#     modified_calc_weight = np.exp(11.4894 + 1.6489 * np.log(modified_length) + 1.3924 * np.log(modified_width))
    calc_weight = np.exp(9.5090 + 3.0852 * np.log(length))
    modified_calc_weight = np.exp(9.5090 + 3.0852 * np.log(modified_length))

    modified_weight = weight + (modified_calc_weight - calc_weight)
    
    return modified_world_keypoints, modified_weight

def get_ann_from_world_keypoints(world_keypoints, cm):
    ann = {'leftCrop': [], 'rightCrop': []}
    for body_part in core_body_parts:
        x, y, z = world_keypoints[body_part]
        px_x = round(x * cm['focalLengthPixel'] / y + cm['pixelCountWidth'] / 2.0)
        px_y = round(cm['pixelCountHeight'] / 2.0 - z * cm['focalLengthPixel'] / y)
        disparity = round(cm['focalLengthPixel'] * cm['baseline'] / y)
        
        left_item = {
            'keypointType': body_part,
            'xFrame': px_x,
            'yFrame': px_y
        }
        
        right_item = {
            'keypointType': body_part,
            'xFrame': px_x - disparity,
            'yFrame': px_y
        }
        
        ann['leftCrop'].append(left_item)
        ann['rightCrop'].append(right_item)
    return ann
    

In [None]:
from weight_estimation.body_parts import core_body_parts
from weight_estimation.dataset import prepare_gtsf_data, compute_akpd_score

from research.weight_estimation.keypoint_utils.optics import pixel2world

mask = (df.weight >= 7000) & (df.weight <= 9000) 
max_pct_inflation = 0.15


world_keypoints = []
for idx, row in df.iterrows():
    ann, cm = row.keypoints, row.camera_metadata
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    world_keypoints.append(wkps)
    
df['world_keypoints'] = world_keypoints

# from research_lib.utils.data_access_utils import S3AccessUtils
# s3 = S3AccessUtils('/root/data')
# akpd_scorer_url = 'https://aquabyte-models.s3-us-west-1.amazonaws.com/keypoint-detection-scorer/akpd_scorer_model_TF.h5'
# akpd_scorer_f, _, _ = s3.download_from_url(akpd_scorer_url)
# from keras.models import load_model
# akpd_scorer_network = load_model(akpd_scorer_f)

modified_ann_list2 = []
modified_weight_list2 = []
modified_akpd_score_list2 = []
cm_list2 = []
for idx, row in df[mask].iterrows():
#     min_pct_inflation = max(0, (8000.0 / row.weight) ** (1.0 / 3) - 1)
    pct_inflation = np.random.uniform(min_pct_inflation, max_pct_inflation)
    world_keypoints = row.world_keypoints
    cm = row.camera_metadata
    weight = row.weight
    modified_world_keypoints, modified_weight = simulate_larger_fish2(world_keypoints, weight, max_pct_inflation)
    modified_ann = get_ann_from_world_keypoints(modified_world_keypoints, cm)
    
    modified_ann_list2.append(modified_ann)
    modified_weight_list2.append(modified_weight)
    modified_akpd_score_list2.append(compute_akpd_score(akpd_scorer_network, modified_ann, cm))
    cm_list2.append(cm)
    
    
    



In [None]:
plt.hist(df.weight)

In [None]:
plt.scatter(modified_akpd_score_list2, modified_weight_list2)

# w = modified_weight_list2.copy()

# for weight in df.weight:
#     w.append(weight)

# plt.hist(w)

In [None]:
from weight_estimation.body_parts import core_body_parts

from research.weight_estimation.keypoint_utils.optics import pixel2world

mask = (df.weight >= 7000) & (df.weight <= 9000) 
max_pct_inflation = 0.15


world_keypoints = []
for idx, row in df.iterrows():
    ann, cm = row.keypoints, row.camera_metadata
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    world_keypoints.append(wkps)
    
df['world_keypoints'] = world_keypoints

modified_ann_list = []
modified_weight_list = []
cm_list = []
for idx, row in df[mask].iterrows():
    pct_inflation = np.random.uniform(0, max_pct_inflation)
    world_keypoints = row.world_keypoints
    cm = row.camera_metadata
    weight = row.weight
    modified_world_keypoints, modified_weight = simulate_larger_fish(world_keypoints, weight, pct_inflation)
    modified_ann = get_ann_from_world_keypoints(modified_world_keypoints, cm)
    
    modified_ann_list.append(modified_ann)
    modified_weight_list.append(modified_weight)
    
    modified_world_keypoints, modified_weight = simulate_larger_fish(world_keypoints, weight, -pct_inflation)
    modified_ann = get_ann_from_world_keypoints(modified_world_keypoints, cm)
    
    modified_ann_list.append(modified_ann)
    modified_weight_list.append(modified_weight)
    
    cm_list.append(cm)
    
    
    



In [None]:
from weight_estimation.body_parts import core_body_parts

from research.weight_estimation.keypoint_utils.optics import pixel2world

mask = (df.weight >= 7000) & (df.weight <= 9000) 
max_pct_inflation = 0.15


world_keypoints = []
for idx, row in df.iterrows():
    ann, cm = row.keypoints, row.camera_metadata
    wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
    world_keypoints.append(wkps)
    
df['world_keypoints'] = world_keypoints

modified_ann_list = []
modified_weight_list = []
cm_list = []
for idx, row in df[mask].iterrows():
    pct_inflation = np.random.uniform(0, max_pct_inflation)
    world_keypoints = row.world_keypoints
    cm = row.camera_metadata
    weight = row.weight
    modified_world_keypoints, modified_weight = simulate_larger_fish(world_keypoints, weight, pct_inflation)
    modified_ann = get_ann_from_world_keypoints(modified_world_keypoints, cm)
    
    modified_ann_list.append(modified_ann)
    modified_weight_list.append(modified_weight)
    cm_list.append(cm)
    
    
    



In [None]:
import uuid

mask = df.weight > 0
tdf_original = df.loc[mask, ['keypoints', 'fish_id', 'weight', 'k_factor', 'camera_metadata']].copy(deep=True)

annotation_list = []
fish_id_list = []
weight_list = []
kf_list = []
akpd_score_list = []
camera_metadata_list = []
for ann, weight, akpd_score, camera_metadata in zip(modified_ann_list2, modified_weight_list2, modified_akpd_score_list2, cm_list2):
    if akpd_score < 0.5 or weight < 0:
        continue
    
    annotation_list.append(ann)
    fish_id_list.append(uuid.uuid1())
    weight_list.append(weight)
    kf_list.append(1.0)
    akpd_score_list.append(akpd_score)
    camera_metadata_list.append(camera_metadata)
    
tdf_synthetic = pd.DataFrame({
    'keypoints': annotation_list,
    'fish_id': fish_id_list,
    'weight': weight_list,
    'k_factor': kf_list,
    'akpd_score': akpd_score_list,
    'camera_metadata': camera_metadata_list
})

tdf = pd.concat([tdf_original, tdf_synthetic])
    


In [None]:
plt.hist(tdf.weight)

In [None]:
import uuid

mask = df.weight > 0
tdf_original = df.loc[mask, ['keypoints', 'fish_id', 'weight', 'k_factor', 'camera_metadata']].copy(deep=True)

annotation_list = []
fish_id_list = []
weight_list = []
kf_list = []
camera_metadata_list = []
for ann, weight, camera_metadata in zip(modified_ann_list, modified_weight_list, cm_list):
    annotation_list.append(ann)
    fish_id_list.append(uuid.uuid1())
    weight_list.append(weight)
    kf_list.append(1.0)
    camera_metadata_list.append(camera_metadata)
    
tdf_synthetic = pd.DataFrame({
    'keypoints': annotation_list,
    'fish_id': fish_id_list,
    'weight': weight_list,
    'k_factor': kf_list,
    'camera_metadata': camera_metadata_list
})

tdf = pd.concat([tdf_original, tdf_synthetic])
    


In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=5,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df3 = augment(tdf, augmentation_config)

In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=5,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df4 = augment2(tdf, augmentation_config)

In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=5,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df5 = augment2(tdf, augmentation_config)

In [None]:
augmented_df3 = tdf

augmented_df3['annotation'] = augmented_df3['keypoints']

In [None]:
# get depth array and add as column to augmented data-frame
depths = []
for idx, row in augmented_df3.iterrows():
    ann, camera_metadata = row.annotation, row.camera_metadata
    cm = CameraMetadata(
        focal_length=camera_metadata['focalLength'],
        focal_length_pixel=camera_metadata['focalLengthPixel'],
        baseline_m=camera_metadata['baseline'],
        pixel_count_width=camera_metadata['pixelCountWidth'],
        pixel_count_height=camera_metadata['pixelCountHeight'],
        image_sensor_width=camera_metadata['imageSensorWidth'],
        image_sensor_height=camera_metadata['imageSensorHeight']
    )
    X = convert_to_world_point_arr(*get_left_right_keypoint_arrs(ann), cm)
    median_depth = np.median(X[:, 1])
    depths.append(median_depth)
augmented_df3['depth'] = depths

In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=10,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df2 = augment(tdf, augmentation_config)

In [None]:
# get depth array and add as column to augmented data-frame
depths = []
for idx, row in augmented_df2.iterrows():
    ann, camera_metadata = row.annotation, row.camera_metadata
    cm = CameraMetadata(
        focal_length=camera_metadata['focalLength'],
        focal_length_pixel=camera_metadata['focalLengthPixel'],
        baseline_m=camera_metadata['baseline'],
        pixel_count_width=camera_metadata['pixelCountWidth'],
        pixel_count_height=camera_metadata['pixelCountHeight'],
        image_sensor_width=camera_metadata['imageSensorWidth'],
        image_sensor_height=camera_metadata['imageSensorHeight']
    )
    X = convert_to_world_point_arr(*get_left_right_keypoint_arrs(ann), cm)
    median_depth = np.median(X[:, 1])
    depths.append(median_depth)
augmented_df2['depth'] = depths

In [None]:
augmentation_config = dict(
    trials=10,
    max_jitter_std=10,
    min_depth=0.5,
    max_depth=2.5
)

augmented_df = augment(df, augmentation_config)

In [None]:
# get depth array and add as column to augmented data-frame
depths = []
for idx, row in augmented_df.iterrows():
    ann, camera_metadata = row.annotation, row.camera_metadata
    cm = CameraMetadata(
        focal_length=camera_metadata['focalLength'],
        focal_length_pixel=camera_metadata['focalLengthPixel'],
        baseline_m=camera_metadata['baseline'],
        pixel_count_width=camera_metadata['pixelCountWidth'],
        pixel_count_height=camera_metadata['pixelCountHeight'],
        image_sensor_width=camera_metadata['imageSensorWidth'],
        image_sensor_height=camera_metadata['imageSensorHeight']
    )
    X = convert_to_world_point_arr(*get_left_right_keypoint_arrs(ann), cm)
    median_depth = np.median(X[:, 1])
    depths.append(median_depth)
augmented_df['depth'] = depths

In [None]:
df.head()

<h1> Train model </h1>

In [None]:
from collections import defaultdict
import json
import os
import random
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
from scipy.interpolate import interpn
from weight_estimation.utils import get_left_right_keypoint_arrs, \
    convert_to_nn_input, CameraMetadata
from weight_estimation.dataset import prepare_gtsf_data
from keras.layers import Input, Dense, Flatten
from keras.models import Model
import keras
from research_lib.utils.data_access_utils import S3AccessUtils
import torch
from torch import nn
from sklearn.linear_model import LinearRegression




class Network(nn.Module):
    """Network class defines neural-network architecture for both weight and k-factor estimation
    (currently both neural networks share identical architecture)."""

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(24, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        """Run inference on input keypoint tensor."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x
    
    def forward_intermediate(self, x):
        """Run inference on input keypoint tensor and get final hiddel layer weights."""
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        return x
        



def normalize(anns: List, camera_metadatas: List) -> np.ndarray:
    norm_anns = []
    for ann, camera_metadata in zip(anns, camera_metadatas):

        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )

        norm_ann = convert_to_nn_input(ann, cm)
        norm_anns.append(torch.flatten(norm_ann).numpy())
    return np.array(norm_anns)


def get_data_split(X: np.ndarray, y: np.ndarray, fish_ids: np.ndarray, train_pct: float,
                   val_pct: float) -> Tuple:
    # select train / test sets such that there are no overlapping fish IDs

    test_pct = 1.0 - train_pct - val_pct
    unique_fish_ids = np.array(list(set(fish_ids)))
    train_cnt, val_cnt, test_cnt = np.random.multinomial(len(unique_fish_ids),
                                                         [train_pct, val_pct, test_pct])

    assignments = np.array([0] * train_cnt + [1] * val_cnt + [2] * test_cnt)
    np.random.shuffle(assignments)
    train_fish_ids = unique_fish_ids[np.where(assignments == 0)]
    val_fish_ids = unique_fish_ids[np.where(assignments == 1)]
    test_fish_ids = unique_fish_ids[np.where(assignments == 2)]

    train_mask = np.isin(fish_ids, train_fish_ids)
    val_mask = np.isin(fish_ids, val_fish_ids)
    test_mask = np.isin(fish_ids, test_fish_ids)

    X_train, y_train = X[train_mask], y[train_mask]
    X_val, y_val = X[val_mask], y[val_mask]
    X_test, y_test = X[test_mask], y[test_mask]

    return X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask


def train_model(X_train, y_train, X_val, y_val, train_config):
    inputs = Input(shape=(24,))
    x = Dense(256, activation='relu')(inputs)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    pred = Dense(1)(x)
    model = Model(inputs, pred)

    epochs = train_config['epochs']
    batch_size = train_config['batch_size']
    lr = train_config['learning_rate']
    patience = train_config['patience']

    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss',
                                               min_delta=0,
                                               patience=patience,
                                               verbose=0,
                                               mode='auto')]

    optimizer = keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=optimizer,
                  loss='mean_squared_error',
                  metrics=['accuracy'])
    model.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=callbacks,
              batch_size=batch_size, epochs=epochs)

    return model


def convert_to_pytorch(model):
    pytorch_model = Network()
    weights = model.get_weights()

    pytorch_model.fc1.weight.data = torch.from_numpy(np.transpose(weights[0]))
    pytorch_model.fc1.bias.data = torch.from_numpy(np.transpose(weights[1]))
    pytorch_model.fc2.weight.data = torch.from_numpy(np.transpose(weights[2]))
    pytorch_model.fc2.bias.data = torch.from_numpy(np.transpose(weights[3]))
    pytorch_model.fc3.weight.data = torch.from_numpy(np.transpose(weights[4]))
    pytorch_model.fc3.bias.data = torch.from_numpy(np.transpose(weights[5]))
    pytorch_model.output.weight.data = torch.from_numpy(np.transpose(weights[6]))
    pytorch_model.output.bias.data = torch.from_numpy(np.transpose(weights[7]))
    
    return pytorch_model


def apply_final_layer_ols(pytorch_model):
    X_ols = pytorch_model.forward_intermediate(torch.from_numpy(X_train).float()).detach().numpy()
    lr = LinearRegression().fit(X_ols, y_train)
    pytorch_model.output.weight.data = torch.from_numpy(np.array(lr.coef_).reshape(1, -1))
    pytorch_model.output.bias.data = torch.from_numpy(np.array([lr.intercept_]))


In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df3.annotation.values.tolist()
cms = augmented_df3.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df3.weight.values
fish_ids = augmented_df3.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)




In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df5.annotation.values.tolist()
cms = augmented_df5.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df5.weight.values
fish_ids = augmented_df5.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)




In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df4.annotation.values.tolist()
cms = augmented_df4.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.9,
    val_pct=0.09,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df4.weight.values
fish_ids = augmented_df4.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)




In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df3.annotation.values.tolist()
cms = augmented_df3.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.9,
    val_pct=0.09,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df3.weight.values
fish_ids = augmented_df3.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)




In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df2.annotation.values.tolist()
cms = augmented_df2.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df2.weight.values
fish_ids = augmented_df2.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
# apply_final_layer_ols(pytorch_model)




In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df2.annotation.values.tolist()
cms = augmented_df2.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df2.weight.values
fish_ids = augmented_df2.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)




In [None]:
import tensorflow

# random.seed(0)
# np.random.seed(0)
# tensorflow.random.set_seed(0)

anns = augmented_df.annotation.values.tolist()
cms = augmented_df.camera_metadata.values.tolist()
X = normalize(anns, cms)

train_config = dict(
    train_pct=0.98,
    val_pct=0.01,
    epochs=500,
    batch_size=64,
    learning_rate=2e-5,
    patience=30
)

y = 1e-4 * augmented_df.weight.values
fish_ids = augmented_df.fish_id.values
X_train, y_train, X_val, y_val, X_test, y_test, train_mask, val_mask, test_mask = get_data_split(X, y, fish_ids,
                                                                train_config['train_pct'],
                                                                train_config['val_pct'])

tf_model = train_model(X_train, y_train, X_val, y_val, train_config)
pytorch_model = convert_to_pytorch(tf_model)
apply_final_layer_ols(pytorch_model)




In [None]:
augmented_df3['is_train'] = train_mask.astype(int)
augmented_df3['is_val'] = val_mask.astype(int)
augmented_df3['is_test'] = test_mask.astype(int)


In [None]:
augmented_df2['is_train'] = train_mask.astype(int)
augmented_df2['is_val'] = val_mask.astype(int)
augmented_df2['is_test'] = test_mask.astype(int)


In [None]:
augmented_df['is_train'] = train_mask.astype(int)
augmented_df['is_val'] = val_mask.astype(int)
augmented_df['is_test'] = test_mask.astype(int)


<h1> Overall accuracy stats </h1>

In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()
print('Train stats')
train_errs = (y_pred[train_mask] - y_train) / y_train
print('Mean error pct: {}'.format((np.mean(y_pred[train_mask]) - np.mean(y_train)) / np.mean(y_train)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(train_errs))))
print('='*20)
print('Val stats')
val_errs = (y_pred[val_mask] - y_val) / y_val
print('Mean error pct: {}'.format((np.mean(y_pred[val_mask]) - np.mean(y_val)) / np.mean(y_val)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(val_errs))))
print('='*20)
print('Test stats')
test_errs = (y_pred[test_mask] - y_test) / y_test
print('Mean error pct: {}'.format((np.mean(y_pred[test_mask]) - np.mean(y_test)) / np.mean(y_test)))
print('Mean absolute error pct: {}'.format(np.mean(np.abs(test_errs))))


In [None]:
def generate_per_bucket_error(X, y):
    y_pred = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()

    buckets = np.arange(0, 10000, 1000) * 1e-4
    bucket_strs = []
    mean_errs = []
    maes = []
    for low, high in zip(buckets, buckets[1:]):
        bucket_str = '{}-{}'.format(round(1e4 * low), round(1e4 * high))
        mask = (y >= low) & (y < high)
        mean_err = np.mean((y_pred[mask] - y[mask]) / y[mask])
        mae = np.mean(np.abs((y_pred[mask] - y[mask]) / y[mask]))
        mean_errs.append(mean_err)
        maes.append(mae)
        bucket_strs.append(bucket_str)
    
    return pd.DataFrame({'bucket': bucket_strs, 'mean_err': mean_errs, 'mae': maes})

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

In [None]:
print('Training dataset')
print('\n')
print(generate_per_bucket_error(X_train, y_train))
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_bucket_error(X_test, y_test))

<h1> Generate errors with respect to depth </h1>

In [None]:
def generate_per_depth_bucket_error(X, y, d):
    predictions = (pytorch_model(torch.from_numpy(X).float())).detach().numpy().squeeze()

    depths = np.arange(0.4, 2.6, 0.1)
    mean_pct_errs = []
    depth_buckets = []
    for low_depth, high_depth in zip(depths, depths[1:]):
        mask = (d >= low_depth) & (d < high_depth)
        depth_bucket = '{}-{}'.format(round(low_depth, 2), round(high_depth, 2))
        depth_buckets.append(depth_bucket)
        mean_pct_err = np.nanmean((predictions[mask] - y[mask]) / y[mask])
        mean_pct_errs.append(mean_pct_err)


    return pd.DataFrame({'depth_bucket': depth_buckets, 'mean_err': mean_pct_errs})

In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df3[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df3[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df3[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df3[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df2[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df2[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df2[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df2[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df3[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df3[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df3[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df3[test_mask].depth.values))


In [None]:
print('Training dataset')
print('\n')
print(generate_per_depth_bucket_error(X_train, y_train, augmented_df3[train_mask].depth.values))
print('\n')
print('='*20)
print('\n')
print('Testing dataset')
print('\n')
print(generate_per_depth_bucket_error(X_test, y_test, augmented_df3[test_mask].depth.values))


<h1> Save model (Can be loaded in backtesting notebook) </h1>

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton2.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton3.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton4.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton5.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton6.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton7.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton8.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton9.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton10.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton11.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton12.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton13.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton14.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton15.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton16.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton17.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton18.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton19.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton20.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton21.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton22.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton23.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton24.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_a1.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_a2.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_a3.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_a4.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_a5.pb'
torch.save(pytorch_model.state_dict(), f)

In [None]:
f = '/root/data/alok/biomass_estimation/playground/output_model_bryton_a6.pb'
torch.save(pytorch_model.state_dict(), f)