In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer, _normalize_world_keypoints
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool
import datetime as dt
import pytz
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')

import matplotlib
font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 22}

matplotlib.rc('font', **font)

import matplotlib.cm as cm
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Predicted Weight Histogram for experiment ID #1</h1>

In [None]:
# load data
f_4eig = '/root/data/temp/results_452623dd3e5172ef6717c3f8e302a52c19e156b5_model_15K_4eig_hypural_plate_research-exp-id-01-vikingfjord-20190628-20190630.h5'
f_20eig = '/root/data/temp/results_cec5035f3af08e7af0fe96cfc3d8b72fa0e2651e_model_15K_20eig_hypural_plate_research-exp-id-01-vikingfjord-20190628-20190630.h5'
df_4eig = pd.read_hdf(f_4eig, 'table')
df_20eig = pd.read_hdf(f_20eig, 'table')

# plot results
plt.figure(figsize=(20, 10))
mask_4eig = (df_4eig.estimated_biomass_g > -2000) & (df_4eig.estimated_biomass_g < 20000)
mask_20eig = (df_20eig.estimated_biomass_g > -2000) & (df_20eig.estimated_biomass_g < 20000)
plt.hist(df_4eig[mask_4eig].estimated_biomass_g, bins=20, color='blue', label='4 eigenvectors', alpha=0.5)
plt.hist(df_20eig[mask_20eig].estimated_biomass_g, bins=20, color='red', label='20 eigenvectors', alpha=0.5)
plt.axvline(6440, color='red')
plt.title('Predicted biomass distribution for Waiting Pen Experiment ID #1')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

In [None]:
median_biomass_prediciton_4eig = df_4eig[mask_4eig].estimated_biomass_g.mean()
median_biomass_prediciton_20eig = df_20eig[mask_20eig].estimated_biomass_g.mean()

print('Median biomass prediction with 4 eigenvectors: {}'.format(median_biomass_prediciton_4eig))
print('Percentage deviation from ground truth with 4 eigenvectors: {}'.format((median_biomass_prediciton_4eig - 6440.0) / 6440.0))

print('Median biomass prediction with 20 eigenvectors: {}'.format(median_biomass_prediciton_20eig))
print('Percentage deviation from ground truth with 20 eigenvectors: {}'.format((median_biomass_prediciton_20eig - 6440.0) / 6440.0))

In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

In [None]:
# TODO: Define your network architecture here
import torch
from torch import nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(33, 16)
        self.fc2 = nn.Linear(16, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x
        



In [None]:
model = Network()
model.load_state_dict(torch.load('/root/data/alok/biomass_estimation/playground/filter_nn_model.pth'))
model.eval()

In [None]:
body_parts = sorted(list(df_4eig.world_keypoints.iloc[0].keys()))
is_goods = []
for idx, row in df_4eig.iterrows():
    wkps = row.world_keypoints
    if wkps:
        norm_wkps = _normalize_world_keypoints(wkps)
        keypoints_list = []
        for bp in body_parts:
            keypoints_list.append(norm_wkps[bp])
        X = np.array(keypoints_list)
        is_good = model(torch.from_numpy(X).float().view(1, -1)).round().item()
        is_goods.append(is_good)
    else:
        is_goods.append(None)



In [None]:
df_4eig['is_good'] = is_goods

In [None]:

# plot results
plt.figure(figsize=(20, 10))
is_good_mask = df_4eig.is_good == 1
hard_outlier_mask = (df_4eig.estimated_biomass_g < 0) | (df_4eig.estimated_biomass_g > 15000)
plt.hist(df_4eig[~hard_outlier_mask].estimated_biomass_g, bins=10, color='blue', label='4 eigenvectors', alpha=0.5)
plt.hist(df_4eig[~hard_outlier_mask & is_good_mask].estimated_biomass_g, bins=10, color='red', label='with filter', alpha=0.5)
plt.axvline(6440, color='red')
plt.title('Predicted biomass distribution for Waiting Pen Experiment ID #1')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

In [None]:
df_4eig[~hard_outlier_mask].estimated_biomass_g.median()

In [None]:
df_4eig[~hard_outlier_mask & is_good_mask].estimated_biomass_g.median()

<h1> Predicted Weight Histogram for Experiment ID #3 </h1>

In [None]:
# load data
f_4eig = '/root/data/temp/results_452623dd3e5172ef6717c3f8e302a52c19e156b5_model_15K_4eig_hypural_plate_research-exp-id-03-vikingfjord-20190709-20190710.h5'
f_20eig = '/root/data/temp/results_cec5035f3af08e7af0fe96cfc3d8b72fa0e2651e_model_15K_20eig_hypural_plate_research-exp-id-03-vikingfjord-20190709-20190710.h5'
df_4eig = pd.read_hdf(f_4eig, 'table')
df_20eig = pd.read_hdf(f_20eig, 'table')

# plot results
plt.figure(figsize=(20, 10))
mask_4eig = (df_4eig.estimated_biomass_g > -2000) & (df_4eig.estimated_biomass_g < 20000)
mask_20eig = (df_20eig.estimated_biomass_g > -2000) & (df_20eig.estimated_biomass_g < 20000)
plt.hist(df_4eig[mask_4eig].estimated_biomass_g, bins=20, color='blue', label='4 eigenvectors', alpha=0.5)
plt.hist(df_20eig[mask_20eig].estimated_biomass_g, bins=20, color='red', label='20 eigenvectors', alpha=0.5)
plt.axvline(5710, color='red')
plt.title('Predicted biomass distribution for Waiting Pen Experiment ID #3')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

In [None]:
median_biomass_prediciton_4eig = df_4eig[mask_4eig].estimated_biomass_g.median()
median_biomass_prediciton_20eig = df_20eig[mask_20eig].estimated_biomass_g.median()

print('Median biomass prediction with 4 eigenvectors: {}'.format(median_biomass_prediciton_4eig))
print('Percentage deviation from ground truth with 4 eigenvectors: {}'.format((median_biomass_prediciton_4eig - 5710.0) / 5710.0))

print('Median biomass prediction with 20 eigenvectors: {}'.format(median_biomass_prediciton_20eig))
print('Percentage deviation from ground truth with 20 eigenvectors: {}'.format((median_biomass_prediciton_20eig - 5710.0) / 5710.0))

In [None]:
body_parts = sorted(list(df_4eig.world_keypoints.iloc[0].keys()))
is_goods = []
for idx, row in df_4eig.iterrows():
    wkps = row.world_keypoints
    if wkps:
        norm_wkps = _normalize_world_keypoints(wkps)
        keypoints_list = []
        for bp in body_parts:
            keypoints_list.append(norm_wkps[bp])
        X = np.array(keypoints_list)
        is_good = model(torch.from_numpy(X).float().view(1, -1)).round().item()
        is_goods.append(is_good)
    else:
        is_goods.append(None)

df_4eig['is_good'] = is_goods


In [None]:

# plot results
plt.figure(figsize=(20, 10))
is_good_mask = df_4eig.is_good == 1
hard_outlier_mask = (df_4eig.estimated_biomass_g < 0) | (df_4eig.estimated_biomass_g > 15000)
plt.hist(df_4eig[~hard_outlier_mask].estimated_biomass_g, bins=10, color='blue', label='4 eigenvectors', alpha=0.5)
plt.hist(df_4eig[~hard_outlier_mask & is_good_mask].estimated_biomass_g, bins=10, color='red', label='with filter', alpha=0.5)
plt.axvline(5710, color='red')
plt.title('Predicted biomass distribution for Waiting Pen Experiment ID #1')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

In [None]:
print(df_4eig[~hard_outlier_mask].estimated_biomass_g.mean())
print(df_4eig[~hard_outlier_mask & is_good_mask].estimated_biomass_g.mean())

In [None]:
plt.figure(figsize=(20, 10))
mask = (df.estimated_biomass_g > 0) & (df.estimated_biomass_g < 12000)
plt.scatter(df[mask].ground_truth_eye_depth, df[mask].length)
plt.xlabel('Working Distance (m)')
plt.ylabel('Predicted length (m)')
plt.title('Est. Fish Length vs. Working Distance')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
mask = (df.estimated_biomass_g > 0) & (df.estimated_biomass_g < 12000)
plt.scatter(df[mask].ground_truth_eye_depth, df[mask].estimated_biomass_g)
plt.xlabel('Working Distance (m)')
plt.ylabel('Predicted weight (m)')
plt.title('Est. Fish Weight vs. Working Distance')
plt.grid()
plt.show()