In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from copy import copy

pd.set_option('display.max_rows', 500)

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_RESEARCH_SQL_CREDENTIALS'])))
query = """
    select * from research.fish_metadata a left join keypoint_annotations b
    on a.left_url = b.left_image_url 
    where b.keypoints is not null and b.is_qa = false;
"""
df = rds_access_utils.extract_from_database(query)

In [None]:
def get_world_keypoints(row):
    if 'leftCrop' in row.keypoints and 'rightCrop' in row.keypoints:
        return pixel2world(row.keypoints['leftCrop'], row.keypoints['rightCrop'], row.camera_metadata)
    else:
        return None
    
df['world_keypoints'] = df.apply(
    lambda x: get_world_keypoints(x), axis=1
)

df = df[~df.world_keypoints.isnull()]

In [None]:
gtsf_fish_identifiers = list(df.fish_id.unique())
train_size = int(0.8 * len(gtsf_fish_identifiers))
fish_ids = random.sample(gtsf_fish_identifiers, train_size)
train_mask = df.fish_id.isin(fish_ids)

In [None]:
def process_row(row, n_factors=1, jitters=[0], oos=False):
    X_row, labels_row = [], []
    left_keypoints_list = row.keypoints.get('leftCrop')
    right_keypoints_list = row.keypoints.get('rightCrop')
    world_keypoints = row.world_keypoints
    cm = row.camera_metadata
    
    if left_keypoints_list and right_keypoints_list:
        left_keypoints = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in left_keypoints_list}
        right_keypoints = {item['keypointType']: np.array([item['xFrame'], item['yFrame']]) for item in right_keypoints_list}

        adj_left_keypoints, adj_right_keypoints = copy(left_keypoints), copy(right_keypoints)
        if adj_left_keypoints['UPPER_LIP'][0] < adj_left_keypoints['TAIL_NOTCH'][0]:
            # perform flip
            for bp in body_parts:
                adj_left_keypoints_bp = copy(left_keypoints[bp])
                adj_right_keypoints_bp = copy(right_keypoints[bp])
                adj_left_keypoints_bp_0 = 4096 - adj_right_keypoints_bp[0]
                adj_left_keypoints_bp_1 = adj_right_keypoints_bp[1]
                adj_right_keypoints_bp_0 = 4096 - adj_left_keypoints_bp[0]
                adj_right_keypoints_bp_1 = adj_left_keypoints_bp[1]
                adj_left_keypoints[bp] = np.array([adj_left_keypoints_bp_0, adj_left_keypoints_bp_1])
                adj_right_keypoints[bp] = np.array([adj_right_keypoints_bp_0, adj_right_keypoints_bp_1])

        # translate such that tail notch is at origin

        left_tail_notch_coords = copy(adj_left_keypoints['TAIL_NOTCH'])
        for bp in body_parts:
            adj_left_keypoints[bp] -= left_tail_notch_coords
            adj_right_keypoints[bp] -= left_tail_notch_coords
        
        for n in range(n_factors):
            if n == 1:
                factor = 1.0
            else:
                factor = np.random.uniform(low=0.5, high=1.5)
            for jitter in jitters:
                trials = 3 if jitter > 0 else 1
                for t in range(trials):
                    data_point = []
                    for bp in body_parts:
                        x, y = factor * adj_left_keypoints[bp][0], factor * adj_left_keypoints[bp][1]
                        x_right = factor * adj_right_keypoints[bp][0]
                        x += np.random.normal(0, jitter)
                        x_right += np.random.normal(0, jitter)
                        d = abs(x - x_right)
                        if d == 0:
                            break
                        depth = depth_from_disp(d, cm)
                        wkp = convert_to_world_point(y, x, depth, cm)
                        data_point.append([wkp[0] / depth, wkp[2] / depth, 0.1 / depth])
                        print(wkp)
                    if d == 0:
                        continue
                    X_row.append(data_point)
                    if not oos:
                        labels_row.append(row.weight)

    return X_row, labels_row


In [None]:
for idx, row in tdf.head(20).iterrows():
    process_row(row, oos=True)

In [None]:
np.random.seed(0)
random.seed(0)
X, labels = [], []

body_parts = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])

row_count = 0
for idx, row in df[train_mask].iterrows():
    
    X_row, labels_row = process_row(row, n_factors=5, jitters=[0, 10, 20])
    X.extend(X_row)
    labels.extend(labels_row)
    
    if row_count % 1000 == 0:
        print('Percentage complete: {}'.format(row_count / df[train_mask].shape[0]))
    row_count += 1
    
X, labels = np.array(X) / 2.0, np.array(labels) / 10000.0


In [None]:
df[df.id == 718521].left_image_url.iloc[0]

In [None]:
df[df.id == 718521].right_image_url.iloc[0]

In [None]:
class KeypointsDataset(Dataset):
    """Keypoints dataset."""

    def __init__(self, X, labels, transform=None):
        self.X = X
        self.labels = labels

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.labels[idx]

        return torch.from_numpy(x).float(), torch.from_numpy(np.array([y])).float()


In [None]:
# TODO: Define your network architecture here
import torch
from torch import nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(33, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.output(x)
        return x
        



In [None]:
from torch.utils.data import DataLoader

dataset = KeypointsDataset(X, labels)
dataloader = DataLoader(dataset, batch_size=25, shuffle=True, num_workers=1)


In [None]:
network = Network()
epochs = 2000
optimizer = torch.optim.Adam(network.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss()

for epoch in range(epochs):
    running_loss = 0.0
    for i, data_batch in enumerate(dataloader):
            
        optimizer.zero_grad()
        X_batch, y_batch = data_batch
        y_pred = network(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 0 and i > 0:
            print(running_loss / i)
    
    loss_for_epoch = running_loss / len(dataloader)
    print('Loss for epoch {}: {}'.format(epoch, loss_for_epoch))





In [None]:
loss_for_epoch

In [None]:
torch.cuda.is_available()

In [None]:
X_t, labels_t = [], []
body_parts = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])

for idx, row in df[~train_mask].iterrows():
    X_row, labels_row = process_row(row, n_factors=3, jitters=[20])
    X_t.extend(X_row)
    labels_t.extend(labels_row)
    
X_t, labels_t = np.array(X_t) / 2.0, np.array(labels_t) / 10000.0


 
 

In [None]:
preds = network(torch.from_numpy(X_t).float())
predictions = preds.detach().numpy().squeeze()
weights = labels_t

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(weights * 1e4, predictions * 1e4)
plt.plot([0, 1e4], [0, 1e4], color='red')
plt.xlim([0, 1e4])
plt.ylim([0, 1e4])
plt.xlabel('Ground truth weight (grams)')
plt.ylabel('Prediction (grams)')
plt.grid()
plt.show()

In [None]:
np.mean(abs((predictions - weights)/weights))

In [None]:
(predictions.mean() - weights.mean())/weights.mean()

In [None]:
f = '/root/data/alok/biomass_estimation/results/model_lateral_only/results_557ec1732d8bc8bc66951d2ea4e69b935d69b111_model_lateral_only_research-exp-id-01-vikingfjord-20190628-20190630.h5'
tdf = pd.read_hdf(f, 'table')



In [None]:
X_o = []
body_parts = sorted([
    'TAIL_NOTCH',
    'ADIPOSE_FIN',
    'ANAL_FIN',
    'PECTORAL_FIN',
    'PELVIC_FIN',
    'DORSAL_FIN',
    'UPPER_LIP',
    'EYE',
    'UPPER_PRECAUDAL_PIT', 
    'LOWER_PRECAUDAL_PIT',
    'HYPURAL_PLATE'
])

for idx, row in tdf.iterrows():
    X_row, _ = process_row(row, n_factors=1, jitters=[0], oos=True)
    X_o.extend(X_row)
    
X_o = np.array(X_o) / 2.0
        

 
 

In [None]:
preds = network(torch.from_numpy(X_o).float())
predictions = preds.detach().numpy().squeeze()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(predictions * 1e4)
plt.show()

In [None]:
plt.scatter(X_o[:, 0, 0], X_o[:, 0, 2], color='r', alpha=0.5)
plt.scatter(X_t[:, 0, 0], X_t[:, 0, 2], color='b', alpha=0.5)




In [None]:
tdf['depth'] = tdf.world_keypoints.apply(lambda x: x['EYE'][1] if x else None) 

In [None]:
plt.hist(tdf.depth)
plt.show()

In [None]:
plt.hist(X[:, 0, 0])
plt.show()

In [None]:
plt.hist(X[:, 0, 2], bins=20)
plt.show()

In [None]:
plt.hist(X_o[:, 0, 2])
plt.show()

In [None]:
preds = network(torch.from_numpy(X_o).float())
predictions = preds.detach().numpy().squeeze()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(predictions * 1e4)
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.hist(tdf[tdf.depth < 0.8].est_weight * 1e4)
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(tdf.depth, 1e4 * tdf.est_weight)
plt.ylim([-10000, 20000])
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(tdf.depth, tdf.estimated_biomass_g)
plt.ylim([-10000, 20000])
plt.grid()
plt.show()