In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool
import datetime as dt
import pytz
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')

import matplotlib.cm as cm
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Load Data </h1>

In [None]:
f_150_eig = '/root/data/temp/results_34ff311a393b0e1ee0c885bb542e5424e51b67de_20190926_waiting_pen_depth_experiments_exp_id_10_bremnes_20190806_20190806.h5'
f_tail_notch = '/root/data/temp/results_e4ae5e0547d81f83c8d01ea4a7086fd433a8f3f2_model_15K_20eig_tail_notch_exp_id_10_bremnes_20190806_20190806.h5'
f_hypural_plate = '/root/data/temp/results_cec5035f3af08e7af0fe96cfc3d8b72fa0e2651e_model_15K_20eig_hypural_plate_exp_id_10_bremnes_20190806_20190806.h5'

key = 'table'
df_150_eig = pd.read_hdf(f_150_eig, key)
df_tail_notch = pd.read_hdf(f_tail_notch, key)
df_hypural_plate = pd.read_hdf(f_hypural_plate, key)

<h1> Predicted Weight Histogram </h1>

In [None]:
plt.figure(figsize=(20, 10))

mask_150_eig = (df_150_eig.estimated_biomass_g > -2000) & (df_150_eig.estimated_biomass_g < 20000)
mask_tail_notch = (df_tail_notch.estimated_biomass_g > -2000) & (df_tail_notch.estimated_biomass_g < 20000)
mask_hypural_plate = (df_hypural_plate.estimated_biomass_g > -2000) & (df_hypural_plate.estimated_biomass_g < 20000)

plt.hist(df_150_eig[mask_150_eig].estimated_biomass_g, bins=20, color='blue', label='150 eigenvectors', alpha=0.5)
plt.hist(df_tail_notch[mask_tail_notch].estimated_biomass_g, bins=20, color='red', label='20 eigenvectors', alpha=0.5)
# plt.hist(df_hypural_plate[mask_hypural_plate].estimated_biomass_g, bins=20, color='red', alpha=0.5)

plt.title('Predicted biomass distribution for 8/6 Waiting Pen Deployment')
plt.xlabel('Predicted weight (g)')
plt.ylabel('Frequency')
plt.legend()
plt.grid()
plt.show()

In [None]:
df_hypural_plate[mask_hypural_plate].estimated_biomass_g.median()

In [None]:
df_tail_notch[mask_tail_notch].estimated_biomass_g.median()

In [None]:
diffs = df_150_eig.estimated_biomass_g - df_hypural_plate.estimated_biomass_g
