### #0 Load mask rcnn results

In [None]:
import json
import os

In [None]:
results = json.load(open('/root/data/headtail/detection_coco_results.json'))

In [None]:
print("Number of images: {}".format(len(results["images"])))
print("Number of annotations: {}".format(len(results["annotations"])))
print("Number of unique annotations: {}".format(len(list(set([k['image_id'] for k in results["annotations"]])))))

In [None]:
# create image dic for faster search
image_dic = {}
for img in results["images"]:
    image_dic[img["id"]] = img

In [None]:
# create ann dic for faster search
ann_dic = {}
for ann in results["annotations"]:
    ann_dic[ann["image_id"]] = ann

In [None]:
# list all the images paths with detections
paths = []
for ann in results['annotations']:
    img_path = image_dic[ann['image_id']]["image_path"]
    paths.append((img_path, ann["image_id"]))
print(len(paths))

In [None]:
# timestamps dict
timestamps = {}
for path in paths:
    path0 = path[0]
    ts = int(os.path.basename(path0).split('.')[0].split('_')[-1])
    side = os.path.basename(path0).split('.')[0].split('_')[0]
    if ts not in timestamps:
        timestamps[ts] = {}
    timestamps[ts][side] = path0
    timestamps[ts][side + "_id"] = path[1]

In [None]:
print("number of pairs: {}".format(len([v for (k, v) in timestamps.items() if "right" in v and "left" in v])))

so for 406 pairs we have full fish on both side

### #1 Display some results

In [None]:
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
from matplotlib.patches import Rectangle, Polygon
import numpy as np
from pycocotools.mask import decode

In [None]:
good_pairs = [v for (k, v) in timestamps.items() if "right" in v and "left" in v]

In [None]:
gp = {'left': '/root/data/rnd/small_pen_data_collection/sotra-small-pen/pen-1/2018-10-01/181001010007_rectified/left_sotra-small-pen_0_1538488432310.jpg',
  'left_id': 2490,
  'right': '/root/data/rnd/small_pen_data_collection/sotra-small-pen/pen-1/2018-10-01/181001010007_rectified/right_sotra-small-pen_0_1538488432310.jpg',
  'right_id': 2763}

In [None]:
# random_timestamp = np.random.choice(good_pairs)
random_timestamps = gp
# left side
left_img = Image.open(random_timestamp["right"])
left_ann = ann_dic[random_timestamp["right_id"]]
left_bbox = left_ann['bbox']
left_rec = Rectangle((left_bbox[0], left_bbox[1]), left_bbox[2], left_bbox[3], linewidth=2,edgecolor='w',linestyle="--",facecolor='none')
seg = left_ann['segmentation'][0]
poly = np.array(seg).reshape((int(len(seg)/2), 2))
left_mask = Polygon(poly)

# right_side
right_img = Image.open(random_timestamp["left"])
right_ann = ann_dic[random_timestamp["left_id"]]
right_bbox = right_ann['bbox']
right_rec = Rectangle((right_bbox[0], right_bbox[1]), right_bbox[2], right_bbox[3], linewidth=2,edgecolor='w',linestyle="--",facecolor='none')
seg = right_ann['segmentation'][0]
poly = np.array(seg).reshape((int(len(seg)/2), 2))
right_mask = Polygon(poly)


f, ax = plt.subplots(1, 2, figsize=(20, 10))
ax[0].imshow(left_img)
ax[0].add_patch(left_rec)
ax[0].add_patch(left_mask)

ax[1].imshow(right_img)
ax[1].add_patch(right_rec)
ax[1].add_patch(right_mask)

plt.show()

### #2 Calculate centroids, depth, length

In [None]:
from sklearn.cluster import DBSCAN, KMeans

In [None]:
focal_length = 0.0107
baseline = 0.135
pixel_size_m = 3.45 * 1e-6 
focal_length_pixel = focal_length / pixel_size_m
image_sensor_width = 0.01412
image_sensor_height = 0.01412

In [None]:
def convert_to_world_point(x, y, d):
    image_center_x = 3000 / 2.0 #depth_map.shape[1] / 2.0
    image_center_y = 4096 / 2.0# depth_map.shape[0] / 2.0
    px_x = x - image_center_x
    px_z = image_center_y - y

    sensor_x = px_x * (image_sensor_width / 3000)
    sensor_z = px_z * (image_sensor_height / 4096)
    
    # d = depth_map[y, x]
    world_y = d
    world_x = (world_y * sensor_x) / focal_length
    world_z = (world_y * sensor_z) / focal_length
    return (world_x, world_y, world_z)

In [None]:
random_state = 170

In [None]:
good_pairs[0]

In [None]:
results = {}
for (i, random_timestamp) in enumerate(good_pairs):
    if i % 10 == 0:
        print('{}/{}'.format(i, len(good_pairs)))
    
    experience = random_timestamp["right"].split('/')[-2].split('_')[0]
    if experience not in results:
        results[experience] = []
    # left side
    left_img = Image.open(random_timestamp["right"])
    left_ann = ann_dic[random_timestamp["right_id"]]
    left_bbox = left_ann['bbox']
    left_rec = Rectangle((left_bbox[0], left_bbox[1]), left_bbox[2], left_bbox[3], linewidth=2,edgecolor='w',linestyle="--",facecolor='none')

    seg = left_ann['segmentation'][0]
    poly = np.array(seg).reshape((int(len(seg)/2), 2))
    y_pred = KMeans(n_clusters=2, random_state=random_state).fit_predict(poly)
    centroids = []
    for label in np.unique(y_pred):
        x_mean = np.mean(poly[y_pred==label, 0])
        y_mean = np.mean(poly[y_pred==label, 1])
        centroids.append((x_mean, y_mean))
    left_centroids = left_centroids[left_centroids[:,1].argsort()]

    # plt.scatter(poly[:, 0], poly[:, 1], c=y_pred)
    # plt.scatter(left_centroids[:, 0], left_centroids[:, 1], c='r')
    # plt.gca().invert_yaxis()
    # plt.title("Centroids")
    # plt.show()

    # right side
    right_img = Image.open(random_timestamp["left"])
    right_ann = ann_dic[random_timestamp["left_id"]]
    right_bbox = right_ann['bbox']
    left_rec = Rectangle((right_bbox[0], right_bbox[1]), right_bbox[2], right_bbox[3], linewidth=2,edgecolor='w',linestyle="--",facecolor='none')

    seg = right_ann['segmentation'][0]
    poly = np.array(seg).reshape((int(len(seg)/2), 2))
    y_pred = KMeans(n_clusters=2, random_state=random_state).fit_predict(poly)
    centroids = []
    for label in np.unique(y_pred):
        x_mean = np.mean(poly[y_pred==label, 0])
        y_mean = np.mean(poly[y_pred==label, 1])
        centroids.append((x_mean, y_mean))
    right_centroids = np.array(centroids)
    right_centroids = right_centroids[right_centroids[:,1].argsort()]

    # plt.scatter(poly[:, 0], poly[:, 1], c=y_pred)
    # plt.scatter(right_centroids[:, 0], right_centroids[:, 1], c='r')
    # plt.gca().invert_yaxis()
    # plt.title("Centroids")
    # plt.show()

    # estimate disparities
    # print(left_centroids)
    # print(right_centroids)
    disparities = left_centroids[:, 0] - right_centroids[:, 0]
    # print(disparities)

    # estimate depth
    depth = focal_length_pixel*baseline / np.array(disparities)
    # print(depth)

    # calculate world coordinate of point 0 
    world0 = convert_to_world_point(left_centroids[0][1], left_centroids[0][0], depth[0])
    # print(world0)

    # calculate world coordinate of point 1 
    world1 = convert_to_world_point(left_centroids[1][1], left_centroids[1][0], depth[0])
    # print(world1)

    # print length
    predicted_length = np.linalg.norm(np.array(world0) - np.array(world1))
    results[experience].append(predicted_length)

In [None]:
ground_truth = {}
for key in results.keys():
    ground_truth[key] = None

In [None]:
ground_truth['181001010001'] = 0.695
ground_truth['181001010002'] = 0.75
ground_truth['181001010003'] = 0.585
ground_truth['181001010004'] = 0.625
ground_truth['181001010005'] = 0.685
ground_truth['181001010006'] = 0.645
ground_truth['181001010007'] = 0.535
ground_truth['181001010008'] = 0.66
ground_truth['181001010009'] = 0.56
ground_truth['1810010100010'] = 0.655

In [None]:
for (i, exp) in enumerate(results.keys()):
    plt.scatter(np.zeros_like(np.array(results[exp]))+i, results[exp])
    plt.plot([i-0.3, i+0.3], [ground_truth[exp]]*2, color='k')
plt.xticks(range(10), list(results.keys()), rotation=70)
plt.ylim([0, 2])

In [None]:
results.keys()