In [11]:
import os
import cv2
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report

In [12]:
NUM_CLASSES = 2

In [28]:
cap = cv2.VideoCapture(0) 
cap.set(3, 640)
cap.set(4, 480) 
cap.set(10, 100) 

folder_name = 'thumbs-up'

folder_path = f'input/myImages3/{folder_name}/'

if os.listdir(folder_path):
    maxnum = max([int(file.split('frame')[1].split('.jpg')[0]) for file in os.listdir(folder_path)])
    i = maxnum
else:
    i = 0

while True: 
    i += 1

    success, img = cap.read()

    cv2.imwrite(folder_path+f'myImages3_{folder_name}_frame{i}.jpg', img)

    cv2.imshow('python', img)   
    
    if cv2.waitKey(20) == 27:  
        break

cv2.destroyWindow("python")  
cap.release()  
cv2.waitKey(1)

-1

In [13]:
input_shape = (64, 64, 1)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(NUM_CLASSES, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

train_datagen = ImageDataGenerator(
    rescale=1./255,         
    shear_range=0.2,       
    zoom_range=0.2,        
    horizontal_flip=True    
)

train_generator = train_datagen.flow_from_directory(
    'input/allImages',
    target_size=(64, 64),
    batch_size=128,
    shuffle=True,
    color_mode='grayscale', 
    class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
    'input/valImages',
    target_size=(64, 64),
    shuffle=True,
    color_mode='grayscale', 
    class_mode='categorical')  

model.fit(
    train_generator,
    epochs=2,  
    steps_per_epoch=len(train_generator),
    verbose=1,
    validation_data=validation_generator)

model.save('trained_gesture_model.h5')

Found 11505 images belonging to 2 classes.
Found 2875 images belonging to 2 classes.
Epoch 1/2


Epoch 2/2


  saving_api.save_model(


In [15]:
y_true = []
y_pred = []

for i in range(len(validation_generator)):
    x_val, y_val = validation_generator[i]
    y_true.extend(np.argmax(y_val, axis=1))  
    y_pred.extend(np.argmax(model.predict(x_val), axis=1))  


              precision    recall  f1-score   support

           0       0.94      0.97      0.96      1431
           1       0.97      0.94      0.96      1444

    accuracy                           0.96      2875
   macro avg       0.96      0.96      0.96      2875
weighted avg       0.96      0.96      0.96      2875



In [17]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.97      0.96      1431
           1       0.97      0.94      0.96      1444

    accuracy                           0.96      2875
   macro avg       0.96      0.96      0.96      2875
weighted avg       0.96      0.96      0.96      2875



In [19]:
model = load_model('trained_gesture_model.h5')

cap = cv2.VideoCapture(0) 
cap.set(3, 640) 
cap.set(4, 480)
cap.set(10, 100)


text = ['hi', 'thumbs-up']
positions = [(50, 50), (50, 100), (50, 150)] 
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
thickness = 3

image_width, image_height = 64, 64

font_color_red = (0, 0, 255) 
font_color_blue = (255, 0, 0) 


while True: 
    success, img = cap.read()

    resized_img = cv2.resize(img, (image_width, image_height)) 
    grey_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY) / 255

    pred = model.predict(np.array([grey_img] ))

    print(pred)

    num = int(np.argmax(pred))

    for i in range(NUM_CLASSES):
        if round(float(pred[:,i]), 3) > 0.5:
            cv2.putText(img, text[i]+f' {round(float(pred[:,i]), 3)}', positions[i], font, font_scale, font_color_red, thickness)
        else:
            cv2.putText(img, text[i]+f' {round(float(pred[:,i]), 3)}', positions[i], font, font_scale, font_color_blue, thickness)
    
    cv2.imshow('python', img) 
    
    if cv2.waitKey(20) == 27: 
        break

cv2.destroyWindow("python")  
cap.release()  
cv2.waitKey(1)


[[0.1789168 0.8210832]]
[[0.18823005 0.8117699 ]]
[[0.20680428 0.7931957 ]]
[[0.21415877 0.78584117]]
[[0.15920319 0.8407968 ]]
[[0.16531907 0.834681  ]]
[[0.16413644 0.8358636 ]]
[[0.17254528 0.8274547 ]]
[[0.16177289 0.8382271 ]]
[[0.1430888 0.8569112]]
[[0.12192605 0.878074  ]]
[[0.13840519 0.86159486]]
[[0.13105085 0.8689491 ]]
[[0.1449676 0.8550324]]
[[0.10163407 0.898366  ]]
[[0.08078834 0.9192117 ]]
[[0.08859893 0.91140103]]
[[0.06923549 0.9307645 ]]
[[0.08224225 0.91775775]]
[[0.06248237 0.93751764]]
[[0.08067992 0.91932005]]
[[0.10443526 0.89556473]]
[[0.16709867 0.8329013 ]]
[[0.11793202 0.88206804]]
[[0.17436859 0.8256314 ]]
[[0.6119252  0.38807485]]
[[0.9077351  0.09226485]]
[[0.91054124 0.0894588 ]]
[[0.80704826 0.19295172]]
[[0.3792111  0.62078893]]
[[0.01567281 0.98432726]]
[[0.4497331  0.55026686]]
[[0.82917786 0.17082217]]
[[0.84025383 0.15974613]]
[[0.96911365 0.03088637]]
[[0.9383256  0.06167438]]
[[0.96874267 0.03125731]]
[[0.75712967 0.24287039]]
[[0.5682943  0.431

-1