In [1]:
import os
import gc
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

tf.__version__

'2.7.0'

In [2]:
# -------- TEST USER ----------- #

TEST_USER      = '001'

BASE_DIR       = '../Dataset/'
IMG_DIR        = 'RGB-Spatial-Path-Images/'
LOG_DIR        = 'Logs/'

USERS          = ['001', '002', '003', '004', '005', '006', '007', '008', '009',
                '010', '011', '012', '013', '014', '015', '016', '017', '018',
                '019', '020', '021', '022', '023', '024', '025']

# ------------------------------- Only Dynalic Gestures ------------------------------ #
GESTURES       = ['j', 'z', 'bad', 'deaf', 'fine', 'good', 'goodbye', 'hello', 'hungry',
                  'me', 'no', 'please', 'sorry', 'thankyou', 'yes', 'you']

PLANES         = ['XY', 'YZ', 'ZX']

BATCH_SIZE     = 8
IMG_LEN        = 160
IMG_SIZE       = (IMG_LEN, IMG_LEN)

# ------------- FOR THE GREATER GOOD :) ------------- #
TRAIN_LEN      = 3840
TEST_LEN       = 160

EPOCHS         = 7
LEARNING_RATE  = 0.001

In [3]:
def load_data(plane):
    X_train = np.zeros((TRAIN_LEN, IMG_LEN, IMG_LEN, 3), dtype='uint8')
    X_test = np.zeros((TEST_LEN, IMG_LEN, IMG_LEN, 3), dtype='uint8')
    y_train = np.zeros((TRAIN_LEN, 1), dtype='uint8')
    y_test = np.zeros((TEST_LEN, 1), dtype='uint8')
    
    train_count = 0
    test_count = 0
        
    for gesture in GESTURES:
        print('loading data for ' + gesture + ' gesture on the ' + plane + ' plane ... ', end='')
        path = os.path.join(BASE_DIR, IMG_DIR, plane, gesture)
        for filename in os.listdir(path):
            img = cv2.imread(os.path.join(path, filename))
            resized = cv2.resize(img, IMG_SIZE)
            if filename[1:4] != TEST_USER:
                X_train[train_count, :] = resized
                y_train[train_count, 0] = GESTURES.index(gesture)
                train_count = train_count + 1
            else:
                X_test[test_count, :] = resized
                y_test[test_count, 0] = GESTURES.index(gesture)
                test_count = test_count + 1
                
        print('√')
        
    return X_train, X_test, y_train, y_test

In [4]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)

In [5]:
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')
base_model.trainable = False

2021-12-29 19:31:31.202621: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-29 19:31:31.244681: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-29 19:31:31.245271: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-29 19:31:31.246332: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [6]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(len(GESTURES))

In [7]:
def get_model():
    inputs = tf.keras.Input(shape=IMG_SHAPE)
    x = preprocess_input(inputs)
    x = base_model(x, training=False)
    x = global_average_layer(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = prediction_layer(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=LEARNING_RATE),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
    return model

In [18]:
model = get_model()
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_3 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_3 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                    

  super(Adam, self).__init__(name, **kwargs)


In [8]:
X_train_xy, X_test_xy, y_train_xy, y_test_xy = load_data('XY')
model_xy = get_model()
history_xy = model_xy.fit(X_train_xy, y_train_xy, epochs=EPOCHS)

loading data for j gesture on the XY plane ... √
loading data for z gesture on the XY plane ... √
loading data for bad gesture on the XY plane ... √
loading data for deaf gesture on the XY plane ... √
loading data for fine gesture on the XY plane ... √
loading data for good gesture on the XY plane ... √
loading data for goodbye gesture on the XY plane ... √
loading data for hello gesture on the XY plane ... √
loading data for hungry gesture on the XY plane ... √
loading data for me gesture on the XY plane ... √
loading data for no gesture on the XY plane ... √
loading data for please gesture on the XY plane ... √
loading data for sorry gesture on the XY plane ... √
loading data for thankyou gesture on the XY plane ... √
loading data for yes gesture on the XY plane ... √
loading data for you gesture on the XY plane ... √


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/7


2021-12-29 19:31:37.365930: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8201


Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [9]:
prob_xy = tf.keras.Sequential([model_xy, tf.keras.layers.Softmax()])
y_pred_xy = prob_xy.predict(X_test_xy)
y_pred = np.argmax(y_pred_xy, axis=1)
print(classification_report(y_test_xy.ravel(), y_pred, zero_division=0))
X_train_xy = None
X_test_xy = None
gc.collect()


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.80      0.89        10
           2       0.71      1.00      0.83        10
           3       0.83      1.00      0.91        10
           4       1.00      1.00      1.00        10
           5       0.88      0.70      0.78        10
           6       1.00      1.00      1.00        10
           7       1.00      1.00      1.00        10
           8       1.00      1.00      1.00        10
           9       1.00      0.90      0.95        10
          10       1.00      1.00      1.00        10
          11       0.18      0.20      0.19        10
          12       0.00      0.00      0.00        10
          13       0.77      1.00      0.87        10
          14       0.53      1.00      0.69        10
          15       1.00      0.60      0.75        10

    accuracy                           0.82       160
   macro avg       0.81   

8865

In [10]:
X_train_yz, X_test_yz, y_train_yz, y_test_yz = load_data('YZ')
model_yz = get_model()
history_yz = model_yz.fit(X_train_yz, y_train_yz, epochs=EPOCHS)

loading data for j gesture on the YZ plane ... √
loading data for z gesture on the YZ plane ... √
loading data for bad gesture on the YZ plane ... √
loading data for deaf gesture on the YZ plane ... √
loading data for fine gesture on the YZ plane ... √
loading data for good gesture on the YZ plane ... √
loading data for goodbye gesture on the YZ plane ... √
loading data for hello gesture on the YZ plane ... √
loading data for hungry gesture on the YZ plane ... √
loading data for me gesture on the YZ plane ... √
loading data for no gesture on the YZ plane ... √
loading data for please gesture on the YZ plane ... √
loading data for sorry gesture on the YZ plane ... √
loading data for thankyou gesture on the YZ plane ... √
loading data for yes gesture on the YZ plane ... √
loading data for you gesture on the YZ plane ... √


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [11]:
prob_yz = tf.keras.Sequential([model_yz, tf.keras.layers.Softmax()])
y_pred_yz = prob_yz.predict(X_test_yz)
y_pred = np.argmax(y_pred_yz, axis=1)
print(classification_report(y_test_yz.ravel(), y_pred, zero_division=0))
X_train_yz = None
X_test_yz = None
gc.collect()

              precision    recall  f1-score   support

           0       0.67      0.20      0.31        10
           1       1.00      0.70      0.82        10
           2       0.89      0.80      0.84        10
           3       0.56      0.90      0.69        10
           4       0.78      0.70      0.74        10
           5       1.00      0.30      0.46        10
           6       0.80      0.80      0.80        10
           7       0.82      0.90      0.86        10
           8       1.00      1.00      1.00        10
           9       1.00      0.90      0.95        10
          10       0.62      1.00      0.77        10
          11       0.38      0.30      0.33        10
          12       0.50      0.30      0.37        10
          13       0.59      1.00      0.74        10
          14       0.75      0.90      0.82        10
          15       0.64      0.90      0.75        10

    accuracy                           0.73       160
   macro avg       0.75   

16020

In [12]:
X_train_zx, X_test_zx, y_train_zx, y_test_zx = load_data('ZX')
model_zx = get_model()
history_zx = model_zx.fit(X_train_zx, y_train_zx, epochs=EPOCHS)

loading data for j gesture on the ZX plane ... √
loading data for z gesture on the ZX plane ... √
loading data for bad gesture on the ZX plane ... √
loading data for deaf gesture on the ZX plane ... √
loading data for fine gesture on the ZX plane ... √
loading data for good gesture on the ZX plane ... √
loading data for goodbye gesture on the ZX plane ... √
loading data for hello gesture on the ZX plane ... √
loading data for hungry gesture on the ZX plane ... √
loading data for me gesture on the ZX plane ... √
loading data for no gesture on the ZX plane ... √
loading data for please gesture on the ZX plane ... √
loading data for sorry gesture on the ZX plane ... √
loading data for thankyou gesture on the ZX plane ... √
loading data for yes gesture on the ZX plane ... √
loading data for you gesture on the ZX plane ... √


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [13]:
prob_zx = tf.keras.Sequential([model_zx, tf.keras.layers.Softmax()])
y_pred_zx = prob_zx.predict(X_test_zx)
y_pred = np.argmax(y_pred_zx, axis=1)
print(classification_report(y_test_zx.ravel(), y_pred, zero_division=0))
X_train_zx = None
X_test_zx = None
gc.collect()

              precision    recall  f1-score   support

           0       0.90      0.90      0.90        10
           1       0.89      0.80      0.84        10
           2       0.31      0.40      0.35        10
           3       0.90      0.90      0.90        10
           4       0.25      0.10      0.14        10
           5       0.56      0.90      0.69        10
           6       0.77      1.00      0.87        10
           7       1.00      1.00      1.00        10
           8       0.91      1.00      0.95        10
           9       1.00      0.20      0.33        10
          10       0.42      1.00      0.59        10
          11       0.00      0.00      0.00        10
          12       0.73      0.80      0.76        10
          13       0.91      1.00      0.95        10
          14       1.00      0.80      0.89        10
          15       1.00      0.60      0.75        10

    accuracy                           0.71       160
   macro avg       0.72   

16020

In [14]:
y_total = y_pred_xy * y_pred_yz * y_pred_zx
y_pred = np.argmax(y_total, axis=1)
report = classification_report(y_test_xy.ravel(), y_pred, zero_division=0)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       1.00      1.00      1.00        10
           3       1.00      1.00      1.00        10
           4       0.90      0.90      0.90        10
           5       1.00      0.90      0.95        10
           6       1.00      1.00      1.00        10
           7       1.00      1.00      1.00        10
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        10
          10       0.83      1.00      0.91        10
          11       0.50      0.80      0.62        10
          12       0.67      0.20      0.31        10
          13       0.91      1.00      0.95        10
          14       1.00      1.00      1.00        10
          15       1.00      1.00      1.00        10

    accuracy                           0.92       160
   macro avg       0.93   

In [15]:
config = '\n\nTEST_USER ' + TEST_USER + '\n'
underline = '=====================================\n'
log_dir = os.path.join(BASE_DIR, LOG_DIR)
if not os.path.exists(log_dir):
    os.mkdir(log_dir)
f = open(os.path.join(log_dir, 'logs_sptl_rgb_v2.txt'), 'a')
f.write(config)
f.write(underline)
f.write(report)
f.close()

In [16]:
'''
            Spatial Path TL RGB
      shapes -> z, colors -> start-end
        ----------------------------
           USER           ACCURACY
           =======================
            001              86%
            002              82%
            003              93%
            004              53%
            005              76%
            006              82%
            007              73%

'''



In [17]:
confusion_matrix(y_test_xy.ravel(), y_pred)

array([[10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  9,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0],
       [ 0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  8,  1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  8,  2,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,