<h1> Keypoint Dataset Construction </h1>

<h3> In this notebook, we will construct the initial base dataset required for training a CNN to detect keypoints </h3>

In [2]:
import json
import os
import tqdm
import glob
import random
import boto3
import pandas as pd
from skimage.io import imread, imsave
from skimage.transform import resize
from skimage.viewer import ImageViewer

<h1> Create small dataset of resized crops to annotate in Labelbox </h1>

In [3]:
# set random seed
random.seed(0)

In [5]:
analysis_df = pd.read_hdf('/root/data/alok/crop_data/data_dumps/analysis_df.h5', 'df')
accepted_mask = (analysis_df.created_by == 'gunnar@aquabyte.ai') & (analysis_df.adult_female_count_adjusted >= 0)
crop_fs = list(analysis_df[accepted_mask].sort_values('created_at', ascending=False).head(10).crop_path.values)
metadata_fs = []
metadata_dir = '/root/data/alok/crop_data/crop_metadata'
for crop_f in crop_fs:
    crop_f_name = os.path.basename(crop_f)
    metadata_f_name = crop_f_name.replace('.jpg', '.json')
    metadata_f = os.path.join(metadata_dir, metadata_f_name)
    print(metadata_f)
    if not os.path.exists(metadata_f):
        print('Error!')
    metadata_fs.append(metadata_f)



crops_for_keypoints_dir = '/root/data/alok/keypoint_detection/crops'
crops_for_keypoints_resized_dir = '/root/data/alok/keypoint_detection/crops_resized'
metadata_for_keypoints_dir = '/root/data/alok/keypoint_detection/crop_metadata'

if not os.path.exists(crops_for_keypoints_dir):
    os.makedirs(crops_for_keypoints_dir)

if not os.path.exists(metadata_for_keypoints_dir):
    os.makedirs(metadata_for_keypoints_dir)


url_prefix = 'https://s3-eu-west-1.amazonaws.com/aquabyte-crops'
urls = []
for crop_f, metadata_f in zip(crop_fs, metadata_fs):
    crop = imread(crop_f)
    crop_resized = resize(crop, (224, 224))
    
    # define output location for crop (original)
    crop_f_name = os.path.basename(crop_f)
    crop_f = os.path.join(crops_for_keypoints_dir, crop_f_name)
    imsave(crop_f, crop)
    
    # define output location for crop (resized)
    crop_resized_f_name = os.path.basename(crop_f)
    crop_resized_f = os.path.join(crops_for_keypoints_resized_dir, crop_resized_f_name)
    imsave(crop_resized_f, crop_resized)

#     metadata = json.load(open(metadata_f))
#     s3_key = metadata['image_key']
#     url = os.path.join(url_prefix, s3_key)
#     urls.append(url)
    
#     # write metadata to disk
#     metadata_f_name = os.path.basename(metadata_f)
#     with open(os.path.join(metadata_for_keypoints_dir, metadata_f_name), 'w') as f:
#         json.dump(metadata, f)
    
    
    



/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548248666107_1318_1432_3706_2341.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548248688324_133_1274_2683_2029.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548248116104_0_1014_2610_2006.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548247952766_1365_1784_4096_2631.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244619498_961_1148_3486_1921.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244042752_643_1238_3774_2186.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244180535_997_815_3294_1807.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244009420_0_620_2566_1341.json
/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244347204_767_921_3510_1817.json
/root/data/alok/crop_data/crop_metadata/left_blom-k

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  .format(dtypeobj_in, dtypeobj_out))


In [12]:
metadata_fs

['/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548248666107_1318_1432_3706_2341.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548248688324_133_1274_2683_2029.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548248116104_0_1014_2610_2006.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548247952766_1365_1784_4096_2631.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244619498_961_1148_3486_1921.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244042752_643_1238_3774_2186.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244180535_997_815_3294_1807.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244009420_0_620_2566_1341.json',
 '/root/data/alok/crop_data/crop_metadata/left_blom-kjeppevikholmen_2_1548244347204_767_921_3510_1817.json',
 '/root/data/al

In [28]:
for i, url in enumerate(urls):
    if i < len(urls) - 1:
        print('{},'.format(url))
    else:
        print(url)

https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2/2019-01-23/left_blom-kjeppevikholmen_2_1548248666107_1318_1432_3706_2341.jpg,
https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2/2019-01-23/left_blom-kjeppevikholmen_2_1548248688324_133_1274_2683_2029.jpg,
https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2/2019-01-23/left_blom-kjeppevikholmen_2_1548248116104_0_1014_2610_2006.jpg,
https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2/2019-01-23/left_blom-kjeppevikholmen_2_1548247952766_1365_1784_4096_2631.jpg,
https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2/2019-01-23/left_blom-kjeppevikholmen_2_1548244619498_961_1148_3486_1921.jpg,
https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2/2019-01-23/left_blom-kjeppevikholmen_2_1548244042752_643_1238_3774_2186.jpg,
https://s3-eu-west-1.amazonaws.com/aquabyte-crops/dev/blom-kjeppevikholmen/2

<h1> Determine lone crop paths and lone metadata paths </h1>

<h3> This is a check to ensure that all data on disk is consistent </h3>

In [35]:
crop_fs = glob.glob(os.path.join(crops_for_keypoints_dir, '*.jpg'))
metadata_fs = glob.glob(os.path.join(metadata_for_keypoints_dir, '*.json'))

# determine lone image paths
for crop_f in crop_fs:
    crop_f_name = os.path.basename(crop_f)
    metadata_f_name = crop_f_name.replace('.jpg', '.json')
    metadata_f = os.path.join(metadata_for_keypoints_dir, metadata_f_name)
    if not os.path.exists(metadata_f):
        print('Lone crop path found: {}'.format(crop_f))

# determine lone metadata paths
for metadata_f in metadata_fs:
    metadata_f_name = os.path.basename(metadata_f)
    crop_f_name = metadata_f_name.replace('.json', '.jpg')
    crop_f = os.path.join(crops_for_keypoints_dir, crop_f_name)
    if not os.path.exists(crop_f):
        print('Lone metadata path found: {}'.format(metadata_f))



<h1> Update the metadata with keypoint information (after Labelbox annotation is complete) </h1>

In [7]:
crops_dir = '/root/data/alok/keypoint_detection/crops'
metadata_dir = '/root/data/alok/keypoint_detection/crop_metadata'
new_metadata_dir = '/root/data/alok/keypoint_detection/new_crop_metadata'
keypoint_metadata_f = '/root/data/alok/keypoint_detection/data_dumps/crop_keypoint_metadata.json'
annotated_fish_objs = json.load(open(keypoint_metadata_f))

In [8]:
for obj in annotated_fish_objs:
    annotation_obj = {}
    for body_part, coords in obj['Label'].items():
        coordinates = (coords[0]['geometry']['x'], coords[0]['geometry']['y'])
        annotation_obj[body_part] = coordinates
    
    s3_url = obj['Labeled Data']
    crop_f_name = os.path.basename(s3_url)
    crop_f = os.path.join(crops_dir, crop_f_name)
    
    if not os.path.exists(crop_f):
        raise 'This crop file path does not exist: {}'.format(crop_f)
        
    
    metadata_f_name = crop_f_name.replace('.jpg', '.json')
    metadata_f = os.path.join(metadata_dir, metadata_f_name)

    metadata = json.load(open(metadata_f))
    metadata['keypoints'] = annotation_obj
    new_metadata_f = os.path.join(new_metadata_dir, metadata_f_name)
    if not os.path.exists(new_metadata_f):
        with open(new_metadata_f, 'w') as f:
            json.dump(metadata, f)
    
        
        