### Import libraries

In [1]:
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras
import sklearn as sk
import sklearn.model_selection

### Extract labels for images

In [2]:
result_frame = pd.read_csv('dataset/subsystem_1/Dataset_Subsystem_1.csv')

In [3]:
result_frame

Unnamed: 0,ID,source,frame,camera_facing_side,gesture,palm_root_x,palm_root_y,palm_thumb_1_x,palm_thumb_1_y,palm_thumb_2_x,...,dorsal_ring_4_x,dorsal_ring_4_y,dorsal_pinky_1_x,dorsal_pinky_1_y,dorsal_pinky_2_x,dorsal_pinky_2_y,dorsal_pinky_3_x,dorsal_pinky_3_y,dorsal_pinky_4_x,dorsal_pinky_4_y
0,102,open_palm.webm,0,open,palm,279,369,189,332,137,...,0,0,0,0,0,0,0,0,0,0
1,102,open_palm.webm,1,open,palm,279,370,188,331,137,...,0,0,0,0,0,0,0,0,0,0
2,102,open_palm.webm,2,open,palm,279,370,187,331,137,...,0,0,0,0,0,0,0,0,0,0
3,102,open_palm.webm,3,open,palm,278,370,186,330,136,...,0,0,0,0,0,0,0,0,0,0
4,102,open_palm.webm,4,open,palm,278,371,185,329,136,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11991,723,three_fingers_dorsal.webm,197,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,312,0,0,0,0,0,0
11992,723,three_fingers_dorsal.webm,198,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,311,0,0,0,0,0,0
11993,723,three_fingers_dorsal.webm,199,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,311,0,0,0,0,0,0
11994,723,three_fingers_dorsal.webm,200,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,311,0,0,0,0,0,0


In [4]:
result_frame_102 = result_frame[(result_frame['ID']==102) & (result_frame['frame'] < 10)]

In [5]:
Y = result_frame_102.values[:,5:]

In [6]:
Y

array([[279, 369, 189, ..., 0, 0, 0],
       [279, 370, 188, ..., 0, 0, 0],
       [279, 370, 187, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=object)

### Extract frames from videos and transform them into images (do not run: too much data for the RAM)

In [7]:
train_folders = ['102'] #'159', '294', '441', '564', '576', '609','666','711','723'
images_array = []
gestures = ['open_palm','open_dorsal','fist_palm','fist_dorsal','three_fingers_palm','three_fingers_dorsal']
for folder in train_folders:
    for gesture in gestures:
        i = 0
        video_reader = cv2.VideoCapture("dataset/subsystem_1/videos/"+folder+"/"+gesture+".webm")
        ret, frame = video_reader.read()
        while ret and i < 10:
            ret, frame = video_reader.read()
            i += 1
            if not ret:
                continue
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            images_array.append(gray_frame)
X = np.array(images_array)
X.shape

(60, 480, 640)

### Create model to extract landmarks

In [8]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier 

class KerasClassifier_Patched(KerasClassifier):
    # bugfix: classifier doesn't declare that it is a classifier
    # in the Scikit learn API
    _estimator_type = "classifier"
    
    # bugfix: the current wrapper does not work with HotOne encoded
    # labels
    # this is only a fix in the specific case of this notebook,
    # not a general onessss
    def score(self, x, y, **kwargs):
        _, accuracy = self.model.evaluate(x,y, verbose=0, **kwargs)
        return accuracy

In [9]:
def setupModel():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(1,480,640,4)))
    model.add(tf.keras.layers.Conv2D(1,2,input_shape = (1,480,640,4)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(80))

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-4),
                  loss=tf.keras.losses.MeanSquaredError()
                 )
    return model

model = KerasClassifier_Patched(build_fn=setupModel,
                                epochs=5,
                                batch_size=10,
                                verbose=1)

### Train the model

In [10]:
X_train, X_test, Y_train, Y_test = sk.model_selection.train_test_split(X,Y,test_size=.2)
X_train = tf.convert_to_tensor(X_train,np.float32)
X_test = tf.convert_to_tensor(X_test,np.float32)
Y_train = tf.convert_to_tensor(Y_train,np.float32)
Y_test = tf.convert_to_tensor(Y_test,np.float32)

In [12]:
X = tf.convert_to_tensor(X,np.float32)
Y = tf.convert_to_tensor(Y,np.float32)
dataset = tf.data.Dataset.from_tensor_slices((X, Y))
batched_dataset = dataset.batch(4)

In [14]:
print(X.shape)
print(Y.shape)

(60, 480, 640)
(60, 80)


In [None]:
model.fit(X,Y)

In [None]:
print(Y)

In [None]:
model.predict(np.array([X[0]]))

In [None]:
sk.model_selection.cross_val_score(model, X, Y, cv=5, scoring='neg_mean_squared_error')

### Validate the model

In [None]:
model.metrics_squared_error(X_test,Y_test)