In [None]:
import numpy as np
import pandas as pd
from collections import defaultdict
from matplotlib import pyplot as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', 500)

<h1> Known features and components </h1>

<h3> IMX304 Sensor Data Sheet: https://en.ids-imaging.com/sony-imx304.html </h3>

In [None]:
px_count_width = 4104
px_count_height = 3006
camera_sensor_width = 1.412  
camera_sensor_height = 1.034
effective_pixel_width = camera_sensor_width / px_count_width
effective_pixel_height = camera_sensor_height / px_count_height
expected_horizontal_disparity_error = 0.25 * effective_pixel_width
expected_vertical_disparity_error = 0.25 * effective_pixel_width
px_max_disparity = 600
max_disparity = px_max_disparity * effective_pixel_width # maximum allowable disparity

<h1> Generate results table (note: depth error here is a function of resolution and stereo matching algorithm is assumed to be perfect) </h1>

In [None]:
def generate_case_results(focal_length, horizontal_field_of_view, vertical_field_of_view, true_fish_total_length, true_fish_width, true_depth, baseline, max_disparity):

    # get estimated errors in estimated depth
    true_disparity = (focal_length * baseline) / true_depth
    estimated_disparity = true_disparity + expected_horizontal_disparity_error # one-sided error as of now
    estimated_depth = (focal_length * baseline) / estimated_disparity
    error_depth = abs(estimated_depth - true_depth)
    error_depth_pct = (error_depth / true_depth) * 100

    # get estimated errors in estimated fish total length
    fish_pixel_length = true_fish_total_length * (focal_length / true_depth)
    estimated_fish_total_length = fish_pixel_length * (estimated_depth / focal_length)
    error_fish_total_length = abs(estimated_fish_total_length - true_fish_total_length)
    error_fish_total_length_pct = (error_fish_total_length / true_fish_total_length) * 100

    # get estimated errors in estimated fish width
    fish_pixel_width = true_fish_width * (focal_length / true_depth)
    estimated_fish_width = fish_pixel_width * (estimated_depth / focal_length)
    error_fish_width = abs(estimated_fish_width - true_fish_width)
    error_fish_width_pct = (error_fish_width / true_fish_width) * 100

    # get horiontal metrics

    field_of_view_total_width = 2*true_depth*np.tan(horizontal_field_of_view / 2)
    overlapping_region_width = max(0, field_of_view_total_width - baseline)
    overlapping_region_size_in_fish = overlapping_region_width / float(true_fish_total_length)
    overlapping_region_fraction = overlapping_region_width / float(field_of_view_total_width)
    fish_size_fraction = true_fish_total_length / float(field_of_view_total_width)
    

    # generate results hash
    results = {
        'focal_length': focal_length,
        'true_fish_total_length': true_fish_total_length,
        'true_fish_width': true_fish_width,
        'true_depth': true_depth,
        'baseline': baseline, 
        'max_disparity': max_disparity
    }
    if true_disparity < max_disparity:
        results['error_depth'] = error_depth
        results['error_depth_pct'] = error_depth_pct
        results['depth_accuracy_pct'] = 100 - error_depth_pct
        results['error_fish_total_length'] = error_fish_total_length
        results['error_fish_total_length_pct'] = error_fish_total_length_pct
        results['error_fish_width'] = error_fish_width
        results['error_fish_width_pct'] = error_fish_width_pct
        results['overlapping_region_size_in_fish'] = overlapping_region_size_in_fish
        results['overlapping_region_fraction'] = overlapping_region_fraction
        results['fish_size_fraction'] = fish_size_fraction
    else:
        results['error_depth'] = None
        results['error_depth_pct'] = None
        results['depth_accuracy_pct'] = None
        results['error_fish_total_length'] = None
        results['error_fish_total_length_pct'] = None
        results['error_fish_width'] = None
        results['error_fish_width_pct'] = None
        results['overlapping_region_size_in_fish'] = None
        results['overlapping_region_fraction'] = None
        results['fish_size_fraction'] = None
        
    return results
    

In [None]:
# note: all values are in centimeters unless specified otherwise

focal_lengths = [0.8, 1.0, 1.2, 1.4, 1.6]
true_fish_total_length_list = list(np.arange(10, 90, 10))
true_fish_width_list = list(np.arange(5, 13, 1))
true_depth_list = list(np.arange(50, 500, 50))
baselines = [round(baseline, 1) for baseline in np.arange(1.0, 20.0, 0.1)]

results_lists = defaultdict(list)

for focal_length in focal_lengths:
    horizontal_field_of_view = 2*np.arctan(camera_sensor_width / (2.0 * focal_length))
    vertical_field_of_view = 2*np.arctan(camera_sensor_height / (2.0 * focal_length))

    for true_fish_total_length in true_fish_total_length_list:
        for true_fish_width in true_fish_width_list:
            for true_depth in true_depth_list:
                for baseline in baselines:
                    results = generate_case_results(focal_length, horizontal_field_of_view, vertical_field_of_view, true_fish_total_length, true_fish_width, true_depth, baseline, max_disparity)
                    for key, val in results.iteritems():
                        results_lists[key].append(val)
                

results_df = pd.DataFrame(results_lists)

<h1> The following is a plot of the error in estimated depth (cm) as a function of the distance from the camera assuming a baseline of 12 cm and a focal length of 16mm. This graph replicates the Nerian calculator results when both are fed the same inputs </h1>

In [None]:
mask = (results_df.focal_length == 1.6) & \
(results_df.true_fish_total_length == 60) & \
(results_df.true_fish_width == 10) & \
(results_df.baseline == 12)


plt.figure(figsize=(20, 15))
plt.grid()
plt.plot(results_df[mask].true_depth, results_df[mask].error_depth)
plt.xlabel('Depth (cm)')
plt.ylabel('Error in estimated depth (cm)')
plt.show()

<h1> Graph #1: For a given fish length/width and focal length, the following graph shows the overlapping region size (expressed in fish lengths) vs. the percentage accuracy in the depth estimate </h1>

<h3> The graph is presented for various baselines (marked along the curves) and various depths. We want the first number to be high since it is directly related to our sampling frequency (MUST be greater than 1 for all depths >= 100 cm). We want the second number to be high as well to produce accurate depth maps. </h3>

In [None]:
# note: you can configure the focal length and true fish total length in this analysis

fig = plt.figure(figsize=(40, 60))
ax = fig.add_subplot(111)
ax.grid()
for true_depth in true_depth_list[:5]:

    mask = (results_df.focal_length == 1.6) & \
    (results_df.true_fish_total_length == 80) & \
    (results_df.true_fish_width == 5) & \
    (results_df.true_depth == true_depth) & \
    (~results_df.isnull().any(axis=1))

    ax.plot(results_df[mask].depth_accuracy_pct, results_df[mask].overlapping_region_size_in_fish, label='Depth: {} cm'.format(true_depth))
    
    for depth_accuracy_pct, overlapping_region_size_in_fish, baseline in zip(results_df[mask].depth_accuracy_pct.values, results_df[mask].overlapping_region_size_in_fish.values, results_df[mask].baseline.values):
        if baseline % 1 == 0:
            ax.annotate(str(baseline), xy=(depth_accuracy_pct, overlapping_region_size_in_fish), textcoords='data')

plt.xlabel('Depth accuracy pct')
plt.ylabel('Overlapping region size in fish')
plt.legend()
plt.show()



<h3> The graph above shows that decreasing the baseline does have a significant impact on the percentage accuracy of the depth estimate (while everything else is held constant). This is because the pixels in our sensor will be extremely small since we are using 12 MP cameras, so the error in the disparity estimate that comes from the pixel size does not have significant impact on the results. The disparity errors will largely come from errors in our stereo matching algorithm (i.e incorrect patch correspondences). The graph also shows that for baselines above 6 centimeters, we will not be able to produce a depth map for true depths within 50 cm assuming that the maximum allowed pixel disparity is 600 pixels (this can probably be increased). I think we can probably safely decrease the baseline to around 6 centimeters while barely impacting the accuracy of the depth map estimate -- this comes at the slight cost of less accurate diparity estimates due to the nonzero pixel width, but the cost is probably far outweighed by the benefit of being able to produce well-behaved depth maps for nearby distances (i.e. between 50 cm and 1 m). </h3>

<h3> One can see that for all focal lengths greater than 1.6 cm, the overlapping region size in fish units is less than 1 for a depth of 50 cm and 100 cm. This is not good, because it means that we won't be able to sample any nearby fish since they won't be fully present in both the left and right frames. In fact, we probably want to narrow our focal length search space down to less than 1.4 cm to avoid this risk. We also don't want to make the focal length too small (as shown in the following analysis) </h3>

<h1> Graph #2: For a given depth and fish size, the graph below shoes the ratio of fish size to total FOV vs. ratio of overlapping region size to total FOV (across different baselines and focal lengths). </h1> 
<h3> We want the first number to be high since having the fish take up a larger part of the FOV allows us to examine it more accurately (i.e. higher number creates a "zoomed-in" effect where we can see scale patterns and lice more clearly). We want the second number to be high as well since having a high overlapping region size will yield to better disparity maps (less likely that fish is in one frame and not in the other). </h3>

In [None]:
# note: you can configure the true depth and true fish total length in this analysis

fig2 = plt.figure(figsize=(30, 30))
ax2 = fig2.add_subplot(111)
ax2.grid()
for baseline in baselines:
    if baseline % 2 != 0:
        continue
    mask = (results_df.true_depth == 100) & \
    (results_df.baseline == baseline) & \
    (results_df.true_fish_total_length == 60) & \
    (results_df.true_fish_width == 5.0) & \
    (~results_df.isnull().any(axis=1))

    ax2.plot(results_df[mask].overlapping_region_fraction, results_df[mask].fish_size_fraction, label='Baseline: {} cm'.format(baseline))

    for overlapping_region_fraction, fish_size_fraction, focal_length in zip(results_df[mask].overlapping_region_fraction.values, results_df[mask].fish_size_fraction.values, results_df[mask].focal_length.values):
        ax2.annotate(str(focal_length), xy=(overlapping_region_fraction, fish_size_fraction), textcoords='data')

plt.xlabel('Overlapping region fraction')
plt.ylabel('Fish size fraction')
plt.legend()
plt.show()




<h3> The graph above shows that small focal lengths (0.8 cm, 1.0 cm) have the downside that the field of view is large, which creates a "zoomed-out" effect in which the fish takes up less space in the frame. The upside is that it allows us to sample more fish, but I think we want to err on the side of sampling less if it means having more accurate readings on our fish (i.e. we should favor higher precision over higher recall). This graph also shows that having small baselines allows the overlapping region to take up almost the entire field of view, which means that it's very unlikely that a fish will be present in one frame but not the other. But as the previous graph demonstrated, lowering the baseline does come at the small cost of introducing more disparity error, so we should probably not decrease the baseline beyond 6 cm </h3>

<h1> Conclusion </h3>

<h3> My recommendation assuming a horizontally oriented stereo camera is to have a baseline of 6 cm and a focal length of 1.2 cm. A baseline of 6 cm is small enough such that the effective size of the overlapping region, along with its ratio to field of view, is large. This results in higher sampling frequency and lower likelihood of fish in one frame but not in the other. A relatively small baseline does come with the cost of slightly higher pixel disparity error, but due to the high resolution of our cameras, this is not a big deal as shown in Graph #1. I recommend a focal length of 1.2 cm (or 12 mm) based on the fact that it is a fair compromise between having too small of a FOV (which would result in an increased likelihood of fish being present in one frame but not in the other) and too large of a FOV (which would result in fish taking up a small part of the frame and creating a "zoomed out" effect that makes it harder to accurately analyze the fish image charac