# Import Required Libraries

In [1]:
import numpy as np
import os
import cv2
import mediapipe as mp
import pickle
import time

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


# Data Collection

In [2]:
# Define the main data directory path
DATA_DIR = './data'

# Check if the main data directory exists; if not, create it
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
    
# Specify the number of classes and the desired dataset size for each class
number_of_classes = 10
dataset_size = 200

# Initialize video capture
cap = cv2.VideoCapture(0)

for j in range(number_of_classes):
    # Check if the directory for the current class does not exist; if not, create it
    if not os.path.exists(os.path.join(DATA_DIR, str(j))):
        os.makedirs(os.path.join(DATA_DIR, str(j)))

    print('Collecting data for class {}'.format(j))

    # infinite loop for capturing images
    while True:
        ret, frame = cap.read()
        # Display a message on the frame
        cv2.putText(frame, 'Ready? Press "S" :)', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('frame', frame)
        # Break the loop if the user presses 's'
        if cv2.waitKey(25) == ord('s'):
            break

    counter = 0
    # Enter a loop to capture and save images
    while counter < dataset_size:
        ret, frame = cap.read()
        cv2.imshow('frame', frame)
        cv2.waitKey(25)
        # Save the current frame as an image in the specified directory
        cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)
        counter += 1

cap.release()
cv2.destroyAllWindows()


Collecting data for class 0
Collecting data for class 1
Collecting data for class 2
Collecting data for class 3
Collecting data for class 4
Collecting data for class 5
Collecting data for class 6
Collecting data for class 7
Collecting data for class 8
Collecting data for class 9


# Feature Extraction

Extracting Features using the MediaPipe library to detect hand landmarks in images and then extracting the (x, y) coordinates of these landmarks to create a dataset for hand gesture recognition

In [3]:
DATA_DIR = './data'

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

data = []
labels = []

# Iterate over subdirectories in the main data directory
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []
        
        # Read the image and convert it to RGB
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Process the image to detect hand landmarks
        results = hands.process(img_rgb)
        
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
#                 mp_drawing.draw_landmarks(
#                 img_rgb,
#                 hand_landmarks,
#                 mp_hands.HAND_CONNECTIONS,
#                 mp_drawing_styles.get_default_hand_landmarks_style(),
#                 mp_drawing_styles.get_default_hand_connections_style())
                # Iterate over individual landmarks and store (x, y) coordinates
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x)
                    data_aux.append(y)
            data.append(data_aux)
            labels.append(dir_)
            


#             plt.figure
#             plt.imshow(img_rgb)
#             plt.show()

# Save the data and labels into a pickle file
f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

# Model Training

# (i) Random Forest classifier

In [3]:
# Load serialized data from 'data.pickle' file
data_dict = pickle.load(open('./data.pickle', 'rb'))

# Convert data and labels to NumPy arrays
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Initialize a Random Forest Classifier
RFmodel = RandomForestClassifier()

# Train the classifier using the training data
RFmodel.fit(x_train, y_train)

# Make predictions on the test set
y_predictRF = RFmodel.predict(x_test)

# Save the trained model to 'RFmodel.p' file using pickle
f = open('RFmodel.p', 'wb')
pickle.dump({'RFmodel': RFmodel}, f)
f.close()

In [5]:
cv_scores = cross_val_score(RFmodel, x_train, y_train, cv=5)

# Print the cross-validation scores
print("Cross-Validation Scores:", cv_scores)

accuracy_rf = accuracy_score(y_test, y_predictRF)
precision_rf = precision_score(y_test, y_predictRF, average='weighted')
recall_rf = recall_score(y_test, y_predictRF, average='weighted')
f1_rf = f1_score(y_test, y_predictRF, average='weighted')
conf_matrix_rf = confusion_matrix(y_test, y_predictRF)

print("\nRandom Forest Model Metrics:")
print(f"Accuracy: {accuracy_rf*100}%")
print(f"Precision: {precision_rf}")
print(f"Recall: {recall_rf}")
print(f"F1 Score: {f1_rf}")
print(f"\nConfusion Matrix:\n{conf_matrix_rf}")

Cross-Validation Scores: [1. 1. 1. 1. 1.]

Random Forest Model Metrics:
Accuracy: 100.0%
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Confusion Matrix:
[[40  0  0  0  0  0  0  0  0  0]
 [ 0 40  0  0  0  0  0  0  0  0]
 [ 0  0 40  0  0  0  0  0  0  0]
 [ 0  0  0 40  0  0  0  0  0  0]
 [ 0  0  0  0 40  0  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0  0 40  0  0  0]
 [ 0  0  0  0  0  0  0 40  0  0]
 [ 0  0  0  0  0  0  0  0 40  0]
 [ 0  0  0  0  0  0  0  0  0 40]]


# (ii) KNN Algorithm

In [6]:
# Initialize a k-Nearest Neighbors Classifier with, for example, k=3 (you can adjust k as needed)
KNNmodel = KNeighborsClassifier(n_neighbors=3)

# Train the classifier using the training data
KNNmodel.fit(x_train, y_train)

# Make predictions on the test set
y_predictKNN = KNNmodel.predict(x_test)

# Save the trained model to 'KNNmodel.p' file using pickle
f = open('KNNmodel.p', 'wb')
pickle.dump({'KNNmodel': KNNmodel}, f)
f.close()

In [7]:
cv_scores = cross_val_score(KNNmodel, x_train, y_train, cv=5)

# Print the cross-validation scores
print("Cross-Validation Scores:", cv_scores)

# Evaluate KNN model
accuracy_knn = accuracy_score(y_test, y_predictKNN)
precision_knn = precision_score(y_test, y_predictKNN, average='weighted')
recall_knn = recall_score(y_test, y_predictKNN, average='weighted')
f1_knn = f1_score(y_test, y_predictKNN, average='weighted')
conf_matrix_knn = confusion_matrix(y_test, y_predictKNN)

print("\nKNN Model Metrics:")
print(f"Accuracy: {accuracy_knn*100}%")
print(f"Precision: {precision_knn}")
print(f"Recall: {recall_knn}")
print(f"F1 Score: {f1_knn}")
print(f"\nConfusion Matrix:\n{conf_matrix_knn}")

Cross-Validation Scores: [1. 1. 1. 1. 1.]

KNN Model Metrics:
Accuracy: 100.0%
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Confusion Matrix:
[[40  0  0  0  0  0  0  0  0  0]
 [ 0 40  0  0  0  0  0  0  0  0]
 [ 0  0 40  0  0  0  0  0  0  0]
 [ 0  0  0 40  0  0  0  0  0  0]
 [ 0  0  0  0 40  0  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0  0 40  0  0  0]
 [ 0  0  0  0  0  0  0 40  0  0]
 [ 0  0  0  0  0  0  0  0 40  0]
 [ 0  0  0  0  0  0  0  0  0 40]]


# Model Testing

In [2]:
# User choice for the machine learning model
choice = int(input("Press 1 for Random Forest Clssifier, 2 for KNN Algorithm :- "))

# Load the chosen machine learning model
if(choice == 1):
    model_dict = pickle.load(open('./RFmodel.p', 'rb'))
    model = model_dict['RFmodel']
else:
    model_dict = pickle.load(open('./KNNmodel.p', 'rb'))
    model = model_dict['KNNmodel']

Press 1 for Random Forest Clssifier, 2 for KNN Algorithm :- 1


In [3]:
# Initialize video capture
cap = cv2.VideoCapture(0)

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Dictionary to map predicted labels to characters
labels_dict = {
    0: 'A', 1: 'B', 2: 'C',
    3: 'D', 4: 'E', 5: 'F',
    6: 'G', 7: 'H', 8: 'I',
    9: 'J'
}


# Variables for response time calculation
start_time = 0
end_time = 0
response_time_list = []

while True:
    
    data_aux = []
    ret, frame = cap.read()    
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    start_time = time.time()
    results = hands.process(frame_rgb)
    
    # Draw landmarks and connections on the frame
    if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
                
            # Collect hand landmarks data for prediction    
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x)
                    data_aux.append(y)
            
            # Make a prediction using the loaded machine learning model
            prediction = model.predict([np.asarray(data_aux)])
            
            # Map the predicted label to a character
            predicted_character = labels_dict[int(prediction[0])]
            
            cv2.putText(frame, predicted_character, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
            
            end_time = time.time()            
                        
            response_time = end_time - start_time
            cv2.putText(frame, f'{response_time:.4f} sec', (frame.shape[1] - 200, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            response_time_list.append(response_time)            

    
    cv2.imshow('frame', frame)
    cv2.waitKey(1)
    
     # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Calculate Average response time

In [6]:
avg_response_time = (sum(response_time_list) / len(response_time_list))

print(avg_response_time)

0.04748920836710418
