**Loading images and labels**

In [37]:
import os
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def load_data(data_dir, augment=False):
    images, labels = [], []
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True)

    for label in os.listdir(data_dir):
        for image_file in os.listdir(os.path.join(data_dir, label)):
            img = image.load_img(os.path.join(data_dir, label, image_file), target_size=(256, 256))
            img_tensor = image.img_to_array(img)
            img_tensor = np.expand_dims(img_tensor, axis=0)
            img_tensor /= 255.  # normalize to [0,1] range

            if augment:
                aug_images = [img_tensor]
                for batch in datagen.flow(img_tensor, batch_size=1):
                    aug_images.append(batch)
                    if len(aug_images) >= 6:  # original + 5 augmented images
                        break
                img_tensor = np.concatenate(aug_images, axis=0)

            images.append(img_tensor)
            labels.extend([label] * len(img_tensor))

    return np.concatenate(images, axis=0), np.array(labels)

images, labels = load_data('ds', augment=True)

**DS split**

In [38]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
X_train.shape

(3081, 256, 256, 3)

**Model teaching**

In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
import tensorflow as tf

tf.random.set_seed(42)

model = Sequential()
model.add(Flatten(input_shape=(256, 256, 3)))
model.add(Dense(1024, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax')) 

model.summary()

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 2s/step - accuracy: 0.4372 - loss: 32.3211 - val_accuracy: 0.5447 - val_loss: 0.9253
Epoch 2/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 2s/step - accuracy: 0.5660 - loss: 1.0204 - val_accuracy: 0.5447 - val_loss: 1.0724
Epoch 3/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 1s/step - accuracy: 0.5663 - loss: 1.0616 - val_accuracy: 0.5447 - val_loss: 1.0482
Epoch 4/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 1s/step - accuracy: 0.5663 - loss: 1.0349 - val_accuracy: 0.5447 - val_loss: 1.0287
Epoch 5/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 1s/step - accuracy: 0.5663 - loss: 1.0133 - val_accuracy: 0.5447 - val_loss: 1.0136
Epoch 6/10
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 1s/step - accuracy: 0.5663 - loss: 0.9960 - val_accuracy: 0.5447 - val_loss: 1.0023
Epoch 7/10
[1m97/97[0m [32m━━━

<keras.src.callbacks.history.History at 0x1c3f21790d0>

**Predict and evaluate**

In [44]:
from sklearn.metrics import accuracy_score, classification_report
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(y_test, y_pred)
unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))

unique, counts = np.unique(y_test, return_counts=True)
print(dict(zip(unique, counts)))

print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred, target_names=le.classes_))



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step
{0: 922, 1: 1770, 2: 389}
{0: 236, 1: 420, 2: 115}
Accuracy: 0.5447470817120622
              precision    recall  f1-score   support

        Fist       0.00      0.00      0.00       236
        Palm       0.54      1.00      0.71       420
       Thumb       0.00      0.00      0.00       115

    accuracy                           0.54       771
   macro avg       0.18      0.33      0.24       771
weighted avg       0.30      0.54      0.38       771



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Magic**

In [18]:
import pygame
import time 

pygame.mixer.init()

playlist = ['song1.mp3', 'song2.mp3']
current_song = 0

def play_music():
    global current_song
    pygame.mixer.music.load(playlist[current_song])
    pygame.mixer.music.play()

def stop_music():
    pygame.mixer.music.stop()
    
def skip_music():
    global current_song
    current_song = (current_song + 1) % len(playlist)
    play_music()

def recognize_gesture(frame, model, le):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray_frame = cv2.resize(gray_frame, (256, 256))
    features = hog(gray_frame, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(4, 4), block_norm='L2-Hys').reshape(1, -1)
    prediction = model.predict(features)
    label = le.inverse_transform(prediction)
    return label[0]

cnt_fist = 0
cnt_palm = 0
cnt_thumb = 0

def clear_cnt():
    cnt_fist = 0
    cnt_palm = 0
    cnt_thumb = 0
    
cap = cv2.VideoCapture(0)

play_music()

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    label = recognize_gesture(frame, svm_model, le)
    if label == 'Palm':
        cnt_palm += 1
        if cnt_palm > 10 :
            stop_music()
            clear_cnt()
            
    elif label == 'Thumb':
        cnt_thumb += 1
        if cnt_thumb > 10 :
            skip_music()
            clear_cnt()
            
    elif label == 'Fist':
        cnt_fist += 1
        if cnt_fist > 10 :
            play_music()
            clear_cnt()
    
    cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Gesture Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit()


KeyboardInterrupt: 