# Data collection

In [1]:
import os

import cv2


DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 16
dataset_size = 100

cap = cv2.VideoCapture(0)
for j in range(number_of_classes):

    if not os.path.exists(os.path.join(DATA_DIR, str(j))):
        os.makedirs(os.path.join(DATA_DIR, str(j)))

    print('Collecting data for class {}'.format(j))

    done = False
    while True:
        ret, frame = cap.read()
        cv2.putText(frame, 'Ready? Press "Q" ! :)', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(25) == ord('q'):
            break

    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        cv2.imshow('frame', frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)

        counter += 1

cap.release()
cv2.destroyAllWindows()



Collecting data for class 0
Collecting data for class 1
Collecting data for class 2
Collecting data for class 3
Collecting data for class 4
Collecting data for class 5
Collecting data for class 6
Collecting data for class 7
Collecting data for class 8
Collecting data for class 9
Collecting data for class 10
Collecting data for class 11
Collecting data for class 12
Collecting data for class 13
Collecting data for class 14
Collecting data for class 15


# Save data

In [6]:
import os
import cv2
import matplotlib.pyplot as plt
import mediapipe as mp
import pickle

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'
data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    dir_path = os.path.join(DATA_DIR, dir_)
    if os.path.isdir(dir_path):
        for img_path in os.listdir(dir_path):
            data_aux = []
            img = cv2.imread(os.path.join(dir_path, img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            results = hands.process(img_rgb)
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    for i in range(len(hand_landmarks.landmark)):
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y
                        data_aux.append(x)
                        data_aux.append(y)
                data.append(data_aux)
                labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

# Training the clasifier

In [7]:
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

data_dict=pickle.load(open('./data.pickle','rb'))
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

model = RandomForestClassifier()

model.fit(x_train, y_train)

y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

100.0% of samples were classified correctly !


# Get output sond with text

In [None]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
from gtts import gTTS
import pygame
import io
import tempfile

# Load the model
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

# Initialize video capture
cap = cv2.VideoCapture(0)

# Initialize Mediapipe components
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
labels_dict = {0: 'Deaf', 1: 'hearing', 2: 'Thanks', 3: 'Drink', 4: 'ask', 5: 'fine', 6: 'see', 7: 'He', 8: 'Hello', 9: 'Yes', 10: 'me', 11: 'Sorry', 12: 'Know', 13: 'Eat', 14: 'You',15: 'my'}

# Initialize Pygame mixer
pygame.mixer.init()

# Track the current and previous predictions
previous_prediction = None
audio_playing = False

while True:
    data_aux = []

    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

            for landmark in hand_landmarks.landmark:
                x = landmark.x
                y = landmark.y
                data_aux.extend([x, y])

    if data_aux:
        prediction = model.predict([np.asarray(data_aux)])
        predicted_value = int(prediction[0])
        if predicted_value in labels_dict:
            predicted_character = labels_dict[predicted_value]

            # Check if the prediction has changed
            if predicted_character != previous_prediction:
                print(f"The recognized sign is: {predicted_character}")
                previous_prediction = predicted_character

                # Generate and play audio for the predicted sign
                #text = f"The recognized sign is: {predicted_character}"
                text = predicted_character
                tts = gTTS(text=text, lang='en')

                with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp:
                    temp_filename = fp.name

                tts.save(temp_filename)
                pygame.mixer.music.load(temp_filename)
                pygame.mixer.music.play()
                audio_playing = True

            # Display the recognized character on the frame
            cv2.putText(frame, f'Sign: {predicted_character}', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3, cv2.LINE_AA)
            #cv2.putText(frame, predicted_character, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3,cv2.LINE_AA)

        else:
            print(f"Unknown prediction: {predicted_value}")
    else:
        print("No hand landmarks detected in the current frame.")
        previous_prediction = None  # Reset if no landmarks are detected

    # Display the frame
    cv2.imshow('Hand Gesture Recognition', frame)

    # Exit condition
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
pygame.quit()