In [None]:
import json, os
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import numpy as np
from itertools import combinations
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.decomposition import PCA
from wpca import WPCA
from sklearn.preprocessing import StandardScaler
from aquabyte.accuracy_metrics import AccuracyMetricsGenerator
from aquabyte.data_access_utils import S3AccessUtils, RDSAccessUtils
from aquabyte.optics import euclidean_distance, pixel2world
from aquabyte.visualize import Visualizer
import random
from scipy.stats import norm
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from multiprocessing import Pool
import datetime as dt
import pytz
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%d')

import matplotlib.cm as cm
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)

<h1> Download all data for Waiting Pen Experiments (ID = 10, 11) </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations where pen_id=46
    and captured_at >= '2019-08-06'
    and captured_at < '2019-08-07'
    and keypoints is not null;
"""
df = rds_access_utils.extract_from_database(query)

<h1> Load and preprocess depth data and join with existing df </h1>

In [None]:
df.captured_at = pd.to_datetime(df.captured_at)


In [None]:
def get_timestamp(x):
    date = x['Date']
    day, month, year = [int(k) for k in date.split('.')]
    time = x['time']
    hour, minute, second = [int(k) for k in time.split(':')]
    local_ts = dt.datetime(year, month, day, hour, minute, second)
    local = pytz.timezone('Europe/Oslo')
    local_dt = local.localize(local_ts, is_dst=None)
    utc_dt = local_dt.astimezone(pytz.utc).strftime('%Y-%m-%d %H:%M:%S')
    return utc_dt
    

In [None]:
depth_df = pd.read_csv('/root/data/alok/biomass_estimation/playground/waiting_pen_depth_experiments.csv')
depth_df = depth_df[depth_df.Pen == 6.0]
depth_df = depth_df.drop(columns=['Down.1', 'Up.1'])
times = depth_df.Down.tolist() + depth_df.Up.tolist()
cols = depth_df.columns.tolist()
cols.remove('Up')
cols.remove('Down')
depth_df = pd.concat([depth_df[cols], depth_df[cols]], axis=0)
depth_df['time'] = times
depth_df['ts'] = pd.DatetimeIndex(pd.to_datetime(depth_df.apply(lambda x: get_timestamp(x), axis=1))).tz_localize(tz='UTC')
depth_df = depth_df.sort_values('ts', ascending=True)

In [None]:
df['depth'] = np.nan
for i in range(depth_df.shape[0]-1):
    ts = depth_df.ts.iloc[i]
    next_ts = depth_df.ts.iloc[i+1]
    depth = depth_df['Depth (m)'].iloc[i]
    mask = (df.captured_at >= ts) & (df.captured_at < next_ts)
    df.loc[mask, 'depth'] = depth
    

In [None]:
times, depths, counts = [], [], []
for i in range(depth_df.shape[0]-1):
    time = depth_df.time.iloc[i]
    depth = depth_df['Depth (m)'].iloc[i]
    ts = depth_df.ts.iloc[i]
    next_ts = depth_df.ts.iloc[i+1]
    mask = (df.captured_at >= ts) & (df.captured_at < next_ts)
    count = df[mask].shape[0]
    
    times.append(ts)
    depths.append(depth)
    counts.append(count)
    

In [None]:
cm = plt.cm.get_cmap('Reds')

fig, ax = plt.subplots(figsize=(15, 10))
sc = ax.scatter(times, [-k for k in depths], c=counts, marker='s', s=600, cmap=cm)
plt.colorbar(sc)
plt.xlabel('Time (MM-DD HH)')
plt.ylabel('Depth (m)')
plt.title('Fish Density Spatiotemporal Profile (15 minute dwell time)')
plt.grid()
plt.show()

<h1> Same analysis as above on a different waiting pen experiment </h1>

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations where pen_id=46
    and captured_at >= '2019-08-07'
    and captured_at < '2019-08-08'
    and keypoints is not null;
"""
df = rds_access_utils.extract_from_database(query)
df.captured_at = pd.to_datetime(df.captured_at)


In [None]:
depth_df = pd.read_csv('/root/data/alok/biomass_estimation/playground/waiting_pen_depth_experiments.csv')
depth_df = depth_df[(depth_df.Experiment == 'Bremnes_2') & (depth_df['Depth (m)'] <= 9.5)]

times = depth_df.Down.tolist() + depth_df.Up.tolist() + depth_df['Down.1'].tolist() + depth_df['Up.1'].tolist()
cols = depth_df.columns.tolist()
cols.remove('Up')
cols.remove('Down')
cols.remove('Up.1')
cols.remove('Down.1')
depth_df = pd.concat([depth_df[cols], depth_df[cols], depth_df[cols], depth_df[cols]], axis=0)
depth_df['time'] = times
depth_df['ts'] = pd.DatetimeIndex(pd.to_datetime(depth_df.apply(lambda x: get_timestamp(x), axis=1))).tz_localize(tz='UTC')
depth_df = depth_df.sort_values('ts', ascending=True)

In [None]:
df['depth'] = np.nan
for i in range(depth_df.shape[0]-1):
    ts = depth_df.ts.iloc[i]
    next_ts = depth_df.ts.iloc[i+1]
    depth = depth_df['Depth (m)'].iloc[i]
    mask = (df.captured_at >= ts) & (df.captured_at < next_ts)
    df.loc[mask, 'depth'] = depth
    

In [None]:
times, depths, counts = [], [], []
for i in range(depth_df.shape[0]-1):
    time = depth_df.time.iloc[i]
    depth = depth_df['Depth (m)'].iloc[i]
    ts = depth_df.ts.iloc[i]
    next_ts = depth_df.ts.iloc[i+1]
    mask = (df.captured_at >= ts) & (df.captured_at < next_ts)
    count = df[mask].shape[0]
    
    times.append(ts)
    depths.append(depth)
    counts.append(count)
    

In [None]:
cm = plt.cm.get_cmap('Reds')

fig, ax = plt.subplots(figsize=(15, 10))
sc = ax.scatter(times, [-k for k in depths], c=counts, marker='s', s=600, cmap=cm)
plt.colorbar(sc)
plt.xlabel('Time (MM-DD HH)')
plt.ylabel('Depth (m)')
plt.title('Fish Density Spatiotemporal Profile (5 minute dwell time)')
plt.grid()
plt.show()