In [None]:
from collections import defaultdict
import json
import os
import pandas as pd
from research.utils.data_access_utils import S3AccessUtils
from research.utils.datetime_utils import add_days, day_difference
from population_metrics.smart_metrics import generate_smart_avg_weight, generate_smart_individual_values, ValidationError
from research.weight_estimation.keypoint_utils.optics import pixel2world
import numpy as np

from matplotlib import pyplot as plt

pd.set_option('display.max_rows', 500)

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))

In [None]:
cohort_names = [
    'seglberget_pen_id_66_2020-05-13_2020-06-13',
    'bolaks_pen_id_88_2020-02-28_2020-03-10',
    'langoy_pen_id_108_2020-05-07_2020-05-17',
    'tittelsnes_pen_id_37_2020-06-10_2020-06-24',
    'aplavika_pen_id_95_2020-07-10_2020-07-26',
    'kjeppevikholmen_pen_id_5_2019-06-18_2019-07-02',
    'silda_pen_id_86_2020-07-02_2020-07-19',
    'vikane_pen_id_60_2020-08-10_2020-08-30',
    'eldviktaren_pen_id_164_2020-09-21_2020-10-08',
    'habranden_pen_id_100_2020-08-10_2020-08-31',
    'varholmen_pen_id_131_2020-08-15_2020-08-30',
    'dale_pen_id_143_2020-10-07_2020-10-21',
    'djubawik_pen_id_153_2020-11-10_2020-11-26',
    'leivsethamran_pen_id_165_2020-10-18_2020-11-13',
    'movikodden_pen_id_114_2020-11-03_2020-11-25',
    'movikodden_pen_id_167_2020-10-13_2020-10-30',
    'slapoya_pen_id_116_2020-10-18_2020-11-08',
    'varholmen_pen_id_131_2020-08-15_2020-08-30',
    'varholmen_pen_id_151_2020-10-02_2020-10-17',
    'varholmen_pen_id_186_2020-10-18_2020-11-02'
]

In [None]:
def _add_date_hour_columns(df):
    df.index = pd.to_datetime(df.captured_at)
    df['date'] = df.index.date
    df['hour'] = df.index.hour
    return df
    
    

In [None]:
batch_name = 'test'

ROOT_DIR = '/root/data/alok/biomass_estimation/playground'
dfs, gt_metadatas = {}, {}
for cohort_name in cohort_names:
    print(cohort_name)
    s3_dir = os.path.join(
        'https://aquabyte-images-adhoc.s3-eu-west-1.amazonaws.com/alok/production_datasets',
        cohort_name
    )

    ground_truth_metadata_url = os.path.join(s3_dir, 'ground_truth_metadata.json')
    ground_truth_key_base = os.path.join(batch_name, cohort_name, 'ground_truth_metadata.json')
    ground_truth_f = os.path.join(ROOT_DIR, ground_truth_key_base)
    s3.download_from_url(ground_truth_metadata_url, custom_location=ground_truth_f)
    gt_metadata = json.load(open(ground_truth_f))
    gt_metadatas[cohort_name] = gt_metadata
    
    data_url = os.path.join(s3_dir, 'annotation_dataset.csv')
    data_f, _, _= s3.download_from_url(data_url)
    df = pd.read_csv(data_f)
    df = _add_date_hour_columns(df)
    dfs[cohort_name] = df
    
    

<h1> Conduct depth analysis </h1>

In [None]:
from weight_estimation.utils import get_left_right_keypoint_arrs, convert_to_world_point_arr, CameraMetadata

for cohort_name, df in dfs.items():
    print(cohort_name)
    depths = []
    for idx, row in df.iterrows():
        ann, camera_metadata = json.loads(row.annotation.replace("'", '"')), json.loads(row.camera_metadata.replace("'", '"'))
        cm = CameraMetadata(
            focal_length=camera_metadata['focalLength'],
            focal_length_pixel=camera_metadata['focalLengthPixel'],
            baseline_m=camera_metadata['baseline'],
            pixel_count_width=camera_metadata['pixelCountWidth'],
            pixel_count_height=camera_metadata['pixelCountHeight'],
            image_sensor_width=camera_metadata['imageSensorWidth'],
            image_sensor_height=camera_metadata['imageSensorHeight']
        )

        X = convert_to_world_point_arr(*get_left_right_keypoint_arrs(ann), cm)
        median_depth = np.median(X[:, 1])
        depths.append(median_depth)

    df['depth'] = depths


In [None]:
fig, axes = plt.subplots(4, 5, figsize=(20, 15))
idx = 0
for cohort_name, df in dfs.items():
    row, col = idx // 5, idx % 5
    axes[row][col].hist(df[df.akpd_score > 0.9].depth.values, bins=20)
    axes[row][col].grid()
    axes[row][col].set_title(cohort_name)
    idx += 1
    
plt.show()

In [None]:
fig, axes = plt.subplots(4, 5, figsize=(30, 25))
idx = 0

working_distances = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
depth_of_field = 0.4

for cohort_name, df in dfs.items():
    print(cohort_name)
    dates = sorted(df.date.unique())
    akpd_mask = df.akpd_score > 0.9
    
    mean_sample_sizes, tenth_pct_sample_sizes = [], []
    for working_distance in working_distances:
        depth_range_mask = (df.depth >= working_distance - 0.5*depth_of_field) & (df.depth <= working_distance + 0.5*depth_of_field)
        sample_sizes = []
        for date in dates:
            date_mask = df.date == date
            sample_size = df[date_mask & depth_range_mask & akpd_mask].shape[0]
            sample_sizes.append(sample_size)
        
        mean_sample_size = np.mean(sample_sizes)
        tenth_pct_sample_size = np.percentile(sample_sizes, 10)
        mean_sample_sizes.append(mean_sample_size)
        tenth_pct_sample_sizes.append(tenth_pct_sample_size)
            
    row, col = idx // 5, idx % 5
    axes[row][col].plot(working_distances, mean_sample_sizes, color='blue')
    axes[row][col].grid()
    axes[row][col].set_title('{};{};{}'.format('_'.join(cohort_name.split('_')[:4]), 
                                               round(df[akpd_mask].estimated_weight_g.mean()), 
                                               round(df[akpd_mask].estimated_weight_g.resample('D').agg(lambda x: x.shape[0]).mean())))
    axes[row][col].set_xlabel('Working Distance (m)')
    axes[row][col].set_ylabel('Samples in Active Depth Range')
    idx += 1
    
plt.show()

<h1> Simulation </h1>

In [None]:
import random

def length_from_weight(weight):
    return (weight**(1/3.0)) / 2.36068 * random.gauss(1.0, 0.05)

class Fish:
    
    def __init__(self, weight_mean, weight_cov, speed_factor_mean, speed_factor_std, 
                 min_depth, max_depth, max_y_coordinate=3.0):
        self.weight = max(random.gauss(weight_mean, weight_mean * weight_cov), 0.1)
        self.length = length_from_weight(self.weight)
        self.height = 0.3 * self.length
        self.depth = random.uniform(min_depth, max_depth)
        self.speed = self.length * max(0.3, random.gauss(speed_factor_mean, speed_factor_std))
        self.is_sampled = False
        self.position = [-10, self.depth, random.uniform(-max_y_coordinate, max_y_coordinate)]
        
    def update_position(self, delta_t):
        delta_x = self.speed * delta_t
        self.position[0] += delta_x
        
    def get_position(self):
        return self.position
        
        
class Camera:
    
    def __init__(self, position, fov_degrees, aspect_ratio=0.75):
        self.position = position
        self.fov = fov_degrees * np.pi / 180.0
        self.vfov = 2 * np.arctan(np.tan(self.fov / 2) * aspect_ratio)
        self.pixel_width = 1000
        self.pixel_height = int(self.pixel_width * aspect_ratio)
        self.focal_length_pixel = (self.pixel_width / 2) / np.tan(self.fov / 2)
        
    @staticmethod
    def gen_p_capture(depth, a=1.0, b=2.5, default_p=1.0):
        if depth < a:
            return default_p
        else:
            return max(default_p * (b - depth) / (b - a), 0)
        
    def contains(self, fish):
        fish_position = fish.get_position()
        fish_segment_at_depth = (fish_position[0] - fish.length / 2.0, fish_position[0] + fish.length / 2.0)
        field_size = 2 * fish_position[1] * np.tan(self.fov / 2.0)
        field_center = self.position[0]
        field_segment_at_depth = (field_center - field_size / 2.0, field_center + field_size / 2.0)
        inside_horizontal_field = (fish_segment_at_depth[0] > field_segment_at_depth[0]) and \
            (fish_segment_at_depth[1] < field_segment_at_depth[1])
        
        vertical_fish_segment_at_depth = (fish_position[2] - fish.height / 2.0, fish_position[2] + fish.height / 2.0)
        vertical_field_segment_at_depth = (-fish_position[1] * np.tan(self.vfov / 2.0), fish_position[1] * np.tan(self.vfov / 2.0))
        inside_vertical_field = (vertical_fish_segment_at_depth[0] > vertical_field_segment_at_depth[0]) and \
            (vertical_fish_segment_at_depth[1] < vertical_field_segment_at_depth[1])
        
        if inside_horizontal_field and inside_vertical_field:
            return random.random() < self.gen_p_capture(fish_position[1])
        return False
        


In [None]:
from matplotlib.colors import Normalize
from matplotlib import cm
from PIL import Image, ImageDraw

sm = cm.ScalarMappable(cmap=cm.get_cmap('Reds'), norm=Normalize(vmin=0.3, vmax=3.0))

def spawn_fish(fishes, avg_weight):
    fish = Fish(avg_weight, 0.2, 0.7, 0.15, 0.3, 3.0)
    fishes.append(fish)
    
    
def move_fish(t, t_new, fishes):
    delta_t = t_new - t
    for fish in fishes:
        fish.update_position(delta_t)
        
    fishes = [fish for fish in fishes if fish.get_position()[0] < 10.0]
    return fishes
    

def check_if_fully_visible(fish, left_camera, right_camera):
    return left_camera.contains(fish) and right_camera.contains(fish)
    
    
def trigger_capture(fishes, sampled_fishes, left_camera, right_camera, remove_dups=True):
    for fish in fishes:
        is_visible = check_if_fully_visible(fish, left_camera, right_camera)
        if is_visible:
            fish.is_sampled = True
            sampled_fishes.append(fish)
            
    if remove_dups:
        fishes = [fish for fish in fishes if fish.is_sampled == False]
    return fishes
            
    

def get_pixel_bbox(fish, camera):
    x_pixel = fish.position[0] * camera.focal_length_pixel / fish.position[1] + camera.pixel_width / 2.0
    y_pixel = -(fish.position[2] * camera.focal_length_pixel / fish.position[1]) + camera.pixel_height / 2.0
    length_pixel = fish.length * camera.focal_length_pixel / fish.position[1]
    height_pixel = fish.height * camera.focal_length_pixel / fish.position[1]
    bbox = [x_pixel-length_pixel/2.0, y_pixel-height_pixel/2.0, x_pixel+length_pixel/2.0, y_pixel+height_pixel/2.0]
    return [int(x) for x in bbox]
    
    
def draw_frame(fishes, left_camera, right_camera):
    im = Image.new('RGB', (left_camera.pixel_width, left_camera.pixel_height))
    draw = ImageDraw.Draw(im)
    for fish in reversed(sorted(fishes, key=lambda x: x.depth)):
        bbox = get_pixel_bbox(fish, left_camera)
        color = sm.to_rgba(fish.depth, bytes=True)
        draw.ellipse(tuple(bbox), fill=color[:3])
    return np.array(im)
        
    
    


In [None]:
def generate_samples(FOV, FPS, avg_weight, aspect_ratio=0.75, reduction_factor=1.0):
    fishes = []
    sampled_fishes = []
    left_camera = Camera((0, 0, 0), FOV, aspect_ratio=aspect_ratio)
    right_camera = Camera((0.105, 0, 0), FOV, aspect_ratio=aspect_ratio)

    capture_times = list(np.arange(0, 100000, 1.0 / FPS))
    fish_spawn_times = list(np.cumsum(np.random.exponential(0.5, int(100000 * reduction_factor))))

    t = 0
    while len(capture_times) > 0 and len(fish_spawn_times) > 0:
        event_type = np.argmin([capture_times[0], fish_spawn_times[0]])
        if event_type == 0:
            t_new = capture_times[0]
            fishes = move_fish(t, t_new, fishes)
            fishes = trigger_capture(fishes, sampled_fishes, left_camera, right_camera, remove_dups=True)
            t = t_new
            del capture_times[0]
        elif event_type == 1:
            t_new = fish_spawn_times[0]
            fishes = move_fish(t, t_new, fishes)
            spawn_fish(fishes, avg_weight)
            t = t_new
            del fish_spawn_times[0]

        if len(capture_times) % 100000 == 0:
            print(len(capture_times))

    return sampled_fishes


In [None]:
c = Camera((0, 0, 0), 80, aspect_ratio=0.75)

In [None]:
sampled_fishes_comparison_dict = defaultdict(dict)

for cohort_name, df in dfs.items():
    print(cohort_name)
    
    akpd_mask = df.akpd_score > 0.9
    avg_weight = df[akpd_mask].estimated_weight_g.mean() * 1e-3
    daily_sample_size = df[akpd_mask].estimated_weight_g.resample('D').agg(lambda x: x.shape[0]).mean()
    
    sampled_fishes = generate_samples(54, 0.6, avg_weight)
    print('Sampled fishes generated!')
    reduction_factor = float(daily_sample_size) / len(sampled_fishes)
    
    adjusted_sampled_fishes = generate_samples(54, 0.6, avg_weight, reduction_factor=reduction_factor)
    print('Adjusted samples generated!')
    adjusted_sampled_fishes_new = generate_samples(80, 8.0, avg_weight, reduction_factor=reduction_factor)
    print('New adjusted samples generated!')
    
    sampled_fishes_comparison_dict[cohort_name]['sampled_fishes'] = adjusted_sampled_fishes
    sampled_fishes_comparison_dict[cohort_name]['new_sampled_fishes'] = adjusted_sampled_fishes_new
    
    

In [None]:
2 * np.arctan(np.tan(80 * np.pi/180 / 2) * 0.75) * 180.0/np.pi

In [None]:
2 * np.arctan(np.tan(self.fov / 2) * aspect_ratio)

In [None]:
fishes = sampled_fishes_comparison_dict['djubawik_pen_id_153_2020-11-10_2020-11-26']['sampled_fishes']
fishes_2 = sampled_fishes_comparison_dict['djubawik_pen_id_153_2020-11-10_2020-11-26']['new_sampled_fishes']

In [None]:
working_distances = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
depth_of_field = 0.4
s1s, s2s = [], []

for working_distance in working_distances:
    lo, hi = working_distance - 0.5*depth_of_field, working_distance + 0.5*depth_of_field
    s1 = len([f for f in fishes if f.depth > lo and f.depth < hi])
    s2 = len([f for f in fishes_2 if f.depth > lo and f.depth < hi])
    
    s1s.append(s1)
    s2s.append(s2)

In [None]:
[y / x if x > 0 else None for x, y in zip(s1s, s2s)]

In [None]:
get_projection_factor_dict(sampled_fishes_comparison_dict['djubawik_pen_id_153_2020-11-10_2020-11-26'])

In [None]:
def convert_to_rad(degrees):
    return degrees * np.pi / 180.0

def calculate_focal_length(fov, sensor_width=0.01412):
    focal_length = (0.5 * sensor_width) / np.arctan(convert_to_rad(fov) / 2.0)
    return focal_length
    
def get_depth_of_field(fov, working_distance, f_number=2.8, base_circle_of_confusion=0.0000107*3):
    focal_length = calculate_focal_length(fov)
    circle_of_confusion = base_circle_of_confusion * np.tan(convert_to_rad(54) / 2.0) / np.tan(convert_to_rad(fov) / 2.0)
    dof = 2 * working_distance**2 * f_number * circle_of_confusion / (focal_length**2)
    return dof
    


In [None]:
get_depth_of_field(80, 0.9)

In [None]:
def get_projection_factor_dict(sampled_fishes_comparison):
    
    sampled_fishes = sampled_fishes_comparison['sampled_fishes']
    sampled_fishes_new = sampled_fishes_comparison['new_sampled_fishes']
    working_distances = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
    depth_of_field = 0.4
    projection_factor_dict = {}

    for working_distance in working_distances:
        lo, hi = working_distance - 0.5*depth_of_field, working_distance + 0.5*depth_of_field
        s1 = len([f for f in fishes if f.depth > lo and f.depth < hi])
        s2 = len([f for f in fishes_2 if f.depth > lo and f.depth < hi])
        projection_factor = s2 / s1 if s1 > 0 else 1.0
        projection_factor_dict[working_distance] = projection_factor
    
    return projection_factor_dict

        


In [None]:
def get_projected_sample_size(sampled_fishes_comparison, working_distance):
    sampled_fishes = sampled_fishes_comparison['new_sampled_fishes']
    depth_of_field = 0.4
    lo, hi = working_distance - 0.5*depth_of_field, working_distance + 0.5*depth_of_field
    s = len([f for f in sampled_fishes if f.depth > lo and f.depth < hi])
    return s
    
    

In [None]:
fig, axes = plt.subplots(4, 5, figsize=(30, 25))
idx = 0

working_distances = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5]
depth_of_field = 0.4

for cohort_name, df in dfs.items():
    print(cohort_name)
    dates = sorted(df.date.unique())
    akpd_mask = df.akpd_score > 0.9
    
#     projection_factor_dict = get_projection_factor_dict(sampled_fishes_comparison_dict[cohort_name])
    
    mean_sample_sizes, tenth_pct_sample_sizes = [], []
    projected_mean_sample_sizes, projected_tenth_pct_sample_sizes = [], []
    for working_distance in working_distances:
        
        projected_sample_size = get_projected_sample_size(sampled_fishes_comparison_dict[cohort_name], working_distance)
        
        depth_of_field = get_depth_of_field(80, working_distance)
        depth_range_mask = (df.depth >= working_distance - 0.5*depth_of_field) & (df.depth <= working_distance + 0.5*depth_of_field)
        sample_sizes, projected_sample_sizes = [], []
        for date in dates:
            date_mask = df.date == date
            sample_size = df[date_mask & depth_range_mask & akpd_mask].shape[0]
            sample_sizes.append(sample_size)
            projected_sample_sizes.append(projected_sample_size)
        
        mean_sample_size = np.mean(sample_sizes)
        tenth_pct_sample_size = np.percentile(sample_sizes, 20)
        projected_mean_sample_size = np.mean(projected_sample_sizes)
        projected_tenth_pct_sample_size = np.percentile(projected_sample_sizes, 20)
        
        mean_sample_sizes.append(mean_sample_size)
        tenth_pct_sample_sizes.append(tenth_pct_sample_size)
        projected_mean_sample_sizes.append(projected_mean_sample_size)
        projected_tenth_pct_sample_sizes.append(projected_tenth_pct_sample_size)
            
    row, col = idx // 5, idx % 5
    axes[row][col].plot(working_distances, projected_tenth_pct_sample_sizes, color='blue')
    axes[row][col].axhline(500, color='red', linestyle='--', label='KPI requirement')
    axes[row][col].grid()
    axes[row][col].set_title('{}: {}'.format('_'.join(cohort_name.split('_')[:4]), 
                                               round(df[akpd_mask].estimated_weight_g.mean())))
    axes[row][col].set_xlabel('Working Distance (m)')
    axes[row][col].set_ylabel('Samples in Active Depth Range')
    axes[row][col].legend()
    idx += 1
    
plt.show()

In [None]:
len(sampled_fishes_comparison_dict['seglberget_pen_id_66_2020-05-13_2020-06-13']['sampled_fishes'])

In [None]:
len(sampled_fishes_comparison_dict['seglberget_pen_id_66_2020-05-13_2020-06-13']['new_sampled_fishes'])

In [None]:
fig, axes = plt.subplots(4, 5, figsize=(20, 15))
idx = 0
for cohort_name, df in dfs.items():
    row, col = idx // 5, idx % 5
    depths = [f.depth for f in sampled_fishes_comparison_dict[cohort_name]['sampled_fishes']]
    axes[row][col].hist(depths, bins=20)
    axes[row][col].grid()
    axes[row][col].set_title(cohort_name)
    idx += 1
    
plt.show()

In [None]:
fig, axes = plt.subplots(4, 5, figsize=(20, 15))
idx = 0
for cohort_name, df in dfs.items():
    row, col = idx // 5, idx % 5
    
    depths = [f.depth for f in sampled_fishes_comparison_dict[cohort_name]['sampled_fishes']]
    axes[row][col].hist(df[df.akpd_score > 0.9].depth.values, bins=20, density=True, alpha=0.5)
    axes[row][col].hist(depths, bins=20, density=True, alpha=0.5)
    axes[row][col].grid()
    axes[row][col].set_title(cohort_name)
    idx += 1
    
plt.show()

In [None]:
2*np.arctan((np.tan(80*0.5 * np.pi / 180)**2 + ((3000.0 / 4096) * np.tan(80*0.5 * np.pi / 180))**2)**.5) * 180.0/np.pi

In [None]:
list(dfs.keys())[15]

In [None]:
tdf = list(dfs.values())[19]

In [None]:
mask = (tdf.depth > 0.5) & (tdf.depth < 1.0)
tdf[mask].akpd_score.mean()

In [None]:
mask = (tdf.depth > 1.0) & (tdf.depth < 1.5)
tdf[mask].akpd_score.mean()

In [None]:
mask = (tdf.depth > 1.5) & (tdf.depth < 2.0)
tdf[mask].akpd_score.mean()