Process EgoHands dataset
Use part of the "EgoHands: A Dataset for Hands in Complex Egocentric Interactions" dataset.

"The EgoHands dataset contains 48 Google Glass videos of complex, first-person interactions between two people." This dataset contains mainly of segmentation masks we can convert into bounding boxes.

This notebook will download the EgoHands dataset, sample some examples and save results in a standardised way that we can use later.

Download the data from http://vision.soic.indiana.edu/egohands_files/egohands_data.zip into ./downloads/ and extract the .zip file before running this notebook.

Data will be saved as csv with 2 columns:

path: absolute path to image
boxes: Boxes as json string. [(ymin, xmin, ymax, ymax) ...]

In [14]:
import os
import sys
import pathlib
import pandas as pd
import json

import numpy as np
import cv2
from matplotlib import pyplot as plt
import scipy
import scipy.io
from skimage import measure, io

from sklearn.model_selection import train_test_split

from IPython.display import display

from pycocotools import mask

np.random.seed(seed=42)

In [15]:
DOWNLOAD_DIR = '/media/hdd/aron/egohands/'
DATASET_PATH = os.path.join(DOWNLOAD_DIR, '_LABELLED_SAMPLES')
TRAINING_FILE = 'train_files.csv'
TESTING_FILE = 'test_files.csv'

In [16]:
# Get all directories with samples
sample_directories = [f for f in pathlib.Path(DATASET_PATH).iterdir() if f.is_dir()]
print('{} folders found.'.format(len(sample_directories)))

48 folders found.


In [17]:
def load_polygons(directory):
    """
    Load polygons from polygons.mat file.
    
    Args:
        (Path): pathlib Path object of directory to load samples from.
        
    Returns 
    """
    # Load polygons file
    annotation_path = directory.joinpath('polygons.mat')
    mat = scipy.io.loadmat(annotation_path.resolve())
    # Load polygons data structure
    polygons = mat['polygons'][0]
    
    return polygons
    
'''
def get_boxes(polygons, frame_idx):
    """
    Get all bounding boxes belonging to a single image.
    
    Args:
        polygons (ndarray): Numpy array containing bounding boxes for each image in a directory
            extracted from .mat file struct. Image bounding boxes should follow image order.
        frame_idx (int): Index of image in folder (when sorted alphabetically).
        
    Returns:
        [(float, float, float, float)] List of bounding boxes belonging to a sigle image.
        Bounding box is represented as (ymin, xmin, ymax, ymax).
    """
    frame_polygons = polygons[frame_idx]
    boxes_list = []
    i = 0
    while True:
        try:
            poly = frame_polygons[i]
        except IndexError:
            break
        if poly.shape[1] == 2:
            xs, ys = zip(*[(int(poly[ci][0]), int(poly[ci][1])) for ci in range(poly.shape[0])])
            boxes_list.append((min(ys), min(xs), max(ys), max(xs)))
        i += 1
    return boxes_list
'''

i = 1

In [18]:
# Get all samples for each directory
def get_path_polygons(directory):
    """
    Get path and boxes represented as string.
    
    Args:
        directory (Path): pathlib Path object of directory to load samples from.
        
    Returns:
        [(str, str)]. List of tuple of (path, boxes as json)
    """
    images_polygons = load_polygons(directory)
    return images_polygons
    #return [
    #    (path.absolute(), polygons_list) for polygons_list, path 
    #    in zip(images_polygons, sorted(directory.glob('*.jpg'))) if polygons_list]


In [19]:
hands_info = dict(description= 'Hands', url= 'http://vision.soic.indiana.edu/projects/egohands/', version= '0.1', year= 2020, contributor= 'Indiana', date_created= '2015 00:55:41.903634')
hands_licenses = [dict(url= 'https://creativecommons.org/licenses/by/4.0/', id= 1, name= 'Creative Commons Attribution 4.0 License')]
hands_categories = [dict(supercategory= 'object', id= 1, name ='hand')]

def annotation_data(folders):
    hand_data = dict(info=hands_info, 
                    licenses=hands_licenses,
                    categories=hands_categories,
                    videos=[],
                    annotations=[])
    ann_id = 1
    vid_id = 1
    for directory in folders:
        
        
        img = io.imread(sorted(directory.glob('*.jpg'))[0])
        
        video_polygons = get_path_polygons(directory)
        
        video = dict(width= img.shape[1],
                     length= len(sorted(directory.glob('*.jpg'))),
                     date_captured= '',
                     license= '',
                     flickr_url= '',
                     file_names= [],
                     id= vid_id,
                     coco_url= '',
                     height=img.shape[0]) 
        
        annotations = {}
        instance_contours = {}
        for i in range(4):
            annotations[ann_id] = dict(height= img.shape[0],
                                    width= img.shape[1],
                                    length= 1,
                                    category_id= 1,
                                    segmentations= [],
                                    bboxes= [],
                                    video_id= vid_id,
                                    iscrowd= False,
                                    id= ann_id,
                                    areas= [])
            instance_contours[ann_id] = []
            
            ann_id += 1
            
                    
        for polygons, frame_path in zip(video_polygons, sorted(directory.glob('*.jpg'))):
            file_name = str(frame_path).split(os.sep)
            
            file_name = os.path.join(*file_name[-2:])
            
            video['file_names'].append(file_name)
            
            for inst_id, polygon in zip(instance_contours, list(polygons)):
                
                if polygon.shape[0]>1:
                    polygon = polygon.astype(int).astype(float)
                    
                    
                    rles = mask.frPyObjects(polygon,img.shape[0],img.shape[1])
                    rle = mask.merge(rles)
                    area = mask.area(rle)
                    bounding_box = mask.toBbox(rle)
                    

                    annotations[inst_id]['bboxes'].append(bounding_box.tolist())
                    annotations[inst_id]['areas'].append(int(area))

                    rle['counts'] = rle['counts'].decode('ascii') 
                    annotations[inst_id]['segmentations'].append(rle)
                    
                else:
                    annotations[inst_id]['segmentations'].append(None)
                    annotations[inst_id]['bboxes'].append(None)
                    annotations[inst_id]['areas'].append(None)
                    
                    
            
        for _, ann in annotations.items():
            hand_data['annotations'].append(ann)
            
        
        vid_id += 1
        hand_data['videos'].append(video)
        print(vid_id)
        
    return hand_data


In [None]:
#training_annotation = annotation_data(sample_directories[:8])
training_annotation = annotation_data(sample_directories[:40])
validation_annotation = annotation_data(sample_directories[40:])

2
3
4
5
6
7
8
9
10
11


In [None]:
with open('/media/hdd/aron/egohands/annotations/instances_train.json', 'w') as outfile:
    json.dump(training_annotation, outfile)