## Creating Neural Network Model for Hand Gesture Classification

Gestures for
  1. Play
  2. Stop
  3. Pause

Loading Dataset

In [None]:
!git clone https://github.com/AjumaML/Hand_Recognition_Project.git

Cloning into 'Hand_Recognition_Project'...
remote: Enumerating objects: 2424, done.[K
remote: Counting objects: 100% (2424/2424), done.[K
remote: Compressing objects: 100% (2416/2416), done.[K
remote: Total 2424 (delta 7), reused 2420 (delta 6), pack-reused 0[K
Receiving objects: 100% (2424/2424), 3.75 MiB | 32.25 MiB/s, done.
Resolving deltas: 100% (7/7), done.


In [None]:
#Importing necessary libraries

import numpy as np
from imutils import paths
import os

In [None]:
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img

In [None]:
#Finding Image path
imagePaths = list(paths.list_images('/content/Hand_Recognition_Project/dataset'))

In [None]:
#Retrieving Labels from Image Paths
data = []
labels = []

IMG_SIZE = 224
CHANNELS = 3

for imagePath in imagePaths:
  label = imagePath.split(os.path.sep)[-2]
  image = load_img(imagePath, target_size=(IMG_SIZE, IMG_SIZE))
  image = img_to_array(image)
  image = image/255

  data.append(image)
  labels.append(label)


data = np.array(data, dtype="float32")
labels = np.array(labels)

In [None]:
data.shape

(2400, 224, 224, 3)

In [None]:
labels.shape

(2400,)

In [None]:
#Finding Label Values
np. unique(labels, return_counts=True)

(array(['Volume_Down', 'Volume_Up', 'play', 'stop'], dtype='<U11'),
 array([900, 900, 300, 300]))

### Encoding Label Values

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
labels = le.fit_transform(labels)

In [None]:
from tensorflow.keras.utils import to_categorical
labels = to_categorical(labels)

In [None]:
labels

array([[1., 0., 0., 0.],
       [1., 0., 0., 0.],
       [1., 0., 0., 0.],
       ...,
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.]], dtype=float32)

In [None]:
# Splitting the dataset into train and test

from sklearn.model_selection import train_test_split
(trainX, testX, trainY, testY) = train_test_split(data, labels,	test_size=0.20, stratify=labels, random_state=42)

In [None]:
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.layers import Input

In [None]:
#Using ResNet for Feature Extraction
feature_extractor_layer = ResNet50V2(weights="imagenet", include_top=False,
	input_tensor=Input(shape=(IMG_SIZE,IMG_SIZE,CHANNELS)))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
feature_extractor_layer.trainable = False

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

In [None]:
# Model building

model = tf.keras.Sequential()
model.add(feature_extractor_layer)
model.add(layers.Flatten(name="flatten"))
model.add(layers.Dense(1024, activation='relu', name='hidden_layer'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4, activation='softmax', name='output'))

In [None]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50v2 (Functional)     (None, 7, 7, 2048)        23564800  
                                                                 
 flatten (Flatten)           (None, 100352)            0         
                                                                 
 hidden_layer (Dense)        (None, 1024)              102761472 
                                                                 
 dropout_6 (Dropout)         (None, 1024)              0         
                                                                 
 output (Dense)              (None, 4)                 4100      
                                                                 
Total params: 126,330,372
Trainable params: 102,765,572
Non-trainable params: 23,564,800
_________________________________________________________________


In [None]:
#Compliling Model
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
  loss="categorical_crossentropy",
  metrics=["accuracy"])

In [None]:
#Pre-processing Image before training Model

from tensorflow.keras.preprocessing.image import ImageDataGenerator
aug = ImageDataGenerator(
	rotation_range=20,
	zoom_range=0.15,
	width_shift_range=0.2,
	height_shift_range=0.2,
	shear_range=0.15,
	horizontal_flip=True,
	fill_mode="nearest")

In [None]:
#Fitting Model
history = model.fit(aug.flow(trainX, trainY),
	                  validation_data=(testX, testY),
	                  epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
from sklearn.metrics import classification_report

In [None]:
predIdxs = model.predict(testX)



In [None]:
predIdxs[0]

array([3.930464e-07, 9.999962e-01, 1.436400e-07, 3.266383e-06],
      dtype=float32)

In [None]:
predIdxs = np.argmax(predIdxs, axis=1)

### Classification Report

In [None]:
print(classification_report(testY.argmax(axis=1), predIdxs,target_names=le.classes_))

              precision    recall  f1-score   support

 Volume_Down       1.00      1.00      1.00       180
   Volume_Up       1.00      1.00      1.00       180
        play       1.00      1.00      1.00        60
        stop       1.00      1.00      1.00        60

    accuracy                           1.00       480
   macro avg       1.00      1.00      1.00       480
weighted avg       1.00      1.00      1.00       480



In [None]:
#Trained Model
model.save('model_gesture_recognition.h5', save_format="h5")

In [None]:
#Trained Model for Hand Gesture Recognition
model = tf.keras.models.load_model('/content/model_gesture_recognition.h5')