### Import libraries

In [104]:
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.optimizers import SGD
import sklearn as sk
import sklearn.model_selection

### Extract labels for images

In [105]:
result_frame = pd.read_csv('dataset/subsystem_1/Dataset_Subsystem_1.csv')

In [106]:
result_frame

Unnamed: 0,ID,source,frame,camera_facing_side,gesture,palm_root_x,palm_root_y,palm_thumb_1_x,palm_thumb_1_y,palm_thumb_2_x,...,dorsal_ring_4_x,dorsal_ring_4_y,dorsal_pinky_1_x,dorsal_pinky_1_y,dorsal_pinky_2_x,dorsal_pinky_2_y,dorsal_pinky_3_x,dorsal_pinky_3_y,dorsal_pinky_4_x,dorsal_pinky_4_y
0,102,open_palm.webm,0,open,palm,279,369,189,332,137,...,0,0,0,0,0,0,0,0,0,0
1,102,open_palm.webm,1,open,palm,279,370,188,331,137,...,0,0,0,0,0,0,0,0,0,0
2,102,open_palm.webm,2,open,palm,279,370,187,331,137,...,0,0,0,0,0,0,0,0,0,0
3,102,open_palm.webm,3,open,palm,278,370,186,330,136,...,0,0,0,0,0,0,0,0,0,0
4,102,open_palm.webm,4,open,palm,278,371,185,329,136,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11991,723,three_fingers_dorsal.webm,197,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,312,0,0,0,0,0,0
11992,723,three_fingers_dorsal.webm,198,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,311,0,0,0,0,0,0
11993,723,three_fingers_dorsal.webm,199,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,311,0,0,0,0,0,0
11994,723,three_fingers_dorsal.webm,200,three_fingers,dorsal,0,0,0,0,0,...,0,0,800,311,0,0,0,0,0,0


In [107]:
result_frame_102_159 = result_frame[(result_frame['ID'] >= 102) & (result_frame['ID'] <= 300)  & (result_frame['frame'] < 10)]

In [108]:
Y = result_frame_102_159.values[:,5:]

In [109]:
Y

array([[279, 369, 189, ..., 0, 0, 0],
       [279, 370, 188, ..., 0, 0, 0],
       [279, 370, 187, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=object)

### Extract frames from videos and transform them into images (do not run: too much data for the RAM)

In [110]:
train_folders = ['102','159', '294'] #, '441', '564', '576', '609','666','711','723'
images_array = []
gestures = ['open_palm','open_dorsal','fist_palm','fist_dorsal','three_fingers_palm','three_fingers_dorsal']
for folder in train_folders:
    for gesture in gestures:
        i = 0
        video_reader = cv2.VideoCapture("dataset/subsystem_1/videos/"+folder+"/"+gesture+".webm")
        ret, frame = video_reader.read()
        while ret and i < 10:
            ret, frame = video_reader.read()
            i += 1
            if not ret:
                continue
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            images_array.append(gray_frame)
X = np.array(images_array)
X.shape

(180, 480, 640)

In [111]:
X = X.reshape((-1,480,640,1))

In [112]:
X_train,X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X,Y, test_size=0.2)

### Convert data to be usable by Tensorflow and Keras

In [113]:
X_train = tf.convert_to_tensor(X_train,np.float32)
X_test = tf.convert_to_tensor(X_test,np.float32)
Y_train = tf.convert_to_tensor(Y_train,np.float32)
Y_test = tf.convert_to_tensor(Y_test,np.float32)
# dataset = tf.data.Dataset.from_tensor_slices((X, Y))
# batched_dataset = dataset.batch(4)

### Create model to extract landmarks

In [114]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(480,640,1)),
    tf.keras.layers.Conv2D(1,3,input_shape = (480,640), activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(200,activation='relu'),
    tf.keras.layers.Dense(80)])

sgd = SGD(learning_rate=1)
model.compile(optimizer=sgd,
              loss='mean_squared_error')

### Train the model

In [115]:
model.fit(X_train,Y_train,epochs=100)

Train on 144 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
 32/144 [=====>........................] - ETA: 4s

KeyboardInterrupt: 

In [116]:
Y_pred = model.predict(X_test[:5])

In [117]:
Y_pred.shape

(5, 80)

In [118]:
Y_pred

array([[190.0718  , 189.18588 , 168.85591 , 166.15862 , 136.59021 ,
        134.03525 , 133.50255 , 126.307945, 119.34069 ,  89.85905 ,
        118.02877 ,  79.56908 , 110.39867 ,  71.373   , 106.1097  ,
         59.40292 , 125.759026,  95.50278 , 129.4871  ,  85.40651 ,
        132.88689 ,  64.74645 , 128.78334 ,  57.455914,  66.767784,
         43.888832,  69.30297 ,  45.656185,  70.27853 ,  37.792885,
         71.60813 ,  43.121746,  76.91348 ,  57.174843,  80.36156 ,
         46.80646 ,  83.7785  ,  50.391327,  90.27084 ,  50.100037,
        175.64537 , 202.1279  , 135.25963 , 131.32924 , 131.96988 ,
        110.716484, 142.52126 , 103.21745 , 185.40863 , 138.81712 ,
        195.02809 , 119.93926 , 182.49107 , 108.4011  , 147.14185 ,
         76.355316, 175.88245 , 144.82857 , 186.28761 , 121.22799 ,
        179.63954 , 108.32012 , 141.95682 ,  79.83326 , 176.81633 ,
        149.06538 , 256.77344 , 197.02347 , 192.18787 , 126.511826,
        149.69254 , 100.99561 , 173.24565 , 155.

In [102]:
Y_test

<tf.Tensor: shape=(36, 80), dtype=float32, numpy=
array([[382., 423., 322., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [278., 371., 184., ...,   0.,   0.,   0.],
       ...,
       [500., 381., 440., ..., 323.,   0.,   0.],
       [  0.,   0.,   0., ..., 266., 570., 246.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.]], dtype=float32)>

{'_input_dataset': <TensorSliceDataset shapes: ((480, 640), (80,)), types: (tf.float32, tf.float32)>,
 '_batch_size': <tf.Tensor: shape=(), dtype=int64, numpy=4>,
 '_drop_remainder': <tf.Tensor: shape=(), dtype=bool, numpy=False>,
 '_structure': (TensorSpec(shape=(None, 480, 640), dtype=tf.float32, name=None),
  TensorSpec(shape=(None, 80), dtype=tf.float32, name=None)),
 '_variant_tensor_attr': <tf.Tensor: shape=(), dtype=variant, numpy=<unprintable>>,
 '_self_setattr_tracking': True,
 '_self_unconditional_checkpoint_dependencies': [TrackableReference(name='_variant_tracker', ref=<tensorflow.python.data.ops.dataset_ops._VariantTracker object at 0x7fadfddf3b50>)],
 '_self_unconditional_dependency_names': {'_variant_tracker': <tensorflow.python.data.ops.dataset_ops._VariantTracker at 0x7fadfddf3b50>},
 '_self_unconditional_deferred_dependencies': {},
 '_self_update_uid': -1,
 '_self_name_based_restores': set(),
 '_variant_tracker': <tensorflow.python.data.ops.dataset_ops._VariantTracker

In [22]:
print(X.shape)
print(Y.shape)

(60, 480, 640)
(60, 80)


In [34]:
model.fit(batched_dataset)

TypeError: fit() missing 1 required positional argument: 'y'

In [None]:
print(Y)

In [None]:
model.predict(np.array([X[0]]))

In [None]:
sk.model_selection.cross_val_score(model, X, Y, cv=5, scoring='neg_mean_squared_error')

### Validate the model

In [None]:
model.metrics_squared_error(X_test,Y_test)