A useful thing to do with high dimensional data is to represent it first in a subspace that is convenient for a human. It will allow one to build its intuition of the dataset based on 2D or 3D representation of this dataset.

In [4]:
import os
import pandas

INPUT_DIRECTORY = os.path.join("..", "input")
ARCHIVE_BASENAME = "multimodhandgestrec"
ARCHIVE_SUBDIRECTORY = "MultiModHandGestRec"
DATASET_DIRECTORY = os.path.join(INPUT_DIRECTORY, ARCHIVE_BASENAME, ARCHIVE_SUBDIRECTORY)

os.listdir(DATASET_DIRECTORY)

['near-infrared', 'skeletal']

The dataset is constituted by:
* **near-infrared** images of hand poses and gestures captured by the LeapMotion sensor (a pair of camera)
* **skeletal** higher level informations (skeletal) about fingers and hands extracted by LeapMotion vision algorithms

Near infrared images constitutes a high dimensional dataset and require sharp and state of the art computer vision algorithms in order to extract high level information about hand pose and gesture. LeapMotion did a great job on it and for the sake of simplicity, we propose to focus on skeletal data first.

In [None]:
import re
import cv2
from IPython.core.debugger import set_trace

FRAME_MATCHER = re.compile("fram[a|e]_(\d+)")
# define constants related to dataset structure
DEPTH = len(DATASET_DIRECTORY.split(os.sep)) + 1 # path to DATASET_DIRECTORY depth + 1 for modality 
PATH = "path"

def extract_fields(path):
    assert(path)
    fields = path.split(os.sep)

    subject = fields[DEPTH + 0]
    serie = fields[DEPTH + 1]
    gesture = fields[DEPTH + 2]
    
    field = fields[DEPTH + 3]
    if (field.isnumeric()):
        trial = field
    else:
        trial = "00"  

    filename, file_extension = os.path.splitext(os.path.basename(path))
    result = FRAME_MATCHER.match(filename)
    if (not result):
        raise RuntimeError("Unexpected filename " + filename)
    frame = result.group(1)
            
    return subject, serie, gesture, trial, frame

def extract_near_infrared(path):
    subject, serie, gesture, trial, frame = extract_fields(path)
    
#    color = cv2.imread(path)
#    gray = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY)
#    normalized = cv2.normalize(gray.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX)
    normalized = "0"
    
    return subject, serie, gesture, trial, frame, normalized

def extract_skeleton(path):
    subject, serie, gesture, trial, frame = extract_fields(path)
    
    file_storage = cv2.FileStorage(path, cv2.FILE_STORAGE_READ)
    
    set_trace()
 
    #features = read_features
    features = "0.0"
    return subject, serie, gesture, trial, frame, features

EXTRACT = {"near-infrared": extract_near_infrared, "skeletal":extract_skeleton}

In [None]:
# initialize dataset as an empty hash table
dataset={}

# iterate trough modalities and build dataset
for modality in os.listdir(DATASET_DIRECTORY):
    
    # create a tabular data structure according to the directory structure and store it in a ahsh table indexed by modality
    subdirectory = os.path.join(DATASET_DIRECTORY, modality)
    paths = [os.path.join(path, filename) for path, filename, files in os.walk(subdirectory) for filename in files]
    data = pandas.DataFrame({PATH: paths})
    
    # initialize different levels of the path structure of the stored dataset as attributes of the dataset table
    data["subject"],\
    data["serie"],\
    data["gesture"],\
    data["trial"],\
    data["frame"],\
    data["data"] = zip(*data["path"].map(EXTRACT[modality]))
    
    # batch cast the Series datatype (faster than casting in extract_field)
    data["subject"] = data["subject"].astype("int64")
    data["serie"] = data["serie"].astype("category")
    data["gesture"] = data["gesture"].astype("category")
    data["trial"] = data["trial"].astype("int64")
    data["frame"] = data["frame"].astype("int64")
  #  data["data"] = data["data"].astype("float32")

    # extract numerical data from files 
    print(data.dtypes)
    print(data.head(100))
    
    dataset[modality] = data; 

path         object
subject       int64
serie      category
gesture    category
trial         int64
frame         int64
data         object
dtype: object
                                                 path  subject         serie  \
0   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
1   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
2   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
3   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
4   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
5   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
6   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
7   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
8   ..\input\multimodhandgestrec\MultiModHandGestR...        0  test_gesture   
9   ..\input\multimodhandgestrec\MultiModHandG

ipdb> print(file_storage.getNode("Frame").getNode("ID").real())
4312.0
ipdb> print(file_storage.getNode("Frame").getNode("ID").integer())
*** AttributeError: 'cv2.FileNode' object has no attribute 'integer'
ipdb> print(file_storage.getNode("Frame").getNode("ID").int())
*** AttributeError: 'cv2.FileNode' object has no attribute 'int'
ipdb> print(file_storage.getNode("Frame").getNode("ID").string())

ipdb> print(file_storage.getNode("Frame").getNode("ID").mat())
*** cv2.error: OpenCV(3.4.1) C:\Miniconda3\conda-bld\opencv-suite_1533128839831\work\modules\core\src\persistence_c.cpp:1422: error: (-2) The node does not represent a user object (unknown type?) in function cvRead
ipdb> print(file_storage.getNode("Frame").getNode("ID").real())
4312.0
ipdb> print(file_storage.getNode("Frame").getNode("ID").float())
*** AttributeError: 'cv2.FileNode' object has no attribute 'float'
ipdb> print(file_storage.getNode("Frame").getNode("ID"))
<FileNode 00000221FCAE4D90>
ipdb> print(file_storage.getNode

**Principal component analysis**

**Self organizing map**