In [3]:
import cv2
import mediapipe as mp
import numpy as np
import os
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix



In [4]:
mp_holistic = mp.solutions.holistic  # Holistic model
mp_drawing = mp.solutions.drawing_utils  # Drawing utilities

# Set up Holistic model
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)


In [5]:
import numpy as np
import time
import cv2
import os
import mediapipe as mp

# Initialize Mediapipe Hand model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Initialize video capture
cap = cv2.VideoCapture(0)

# Function to extract keypoints from Mediapipe results (hand landmarks only)
def extract_keypoints(results):
    keypoints = []
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            for landmark in hand_landmarks.landmark:
                keypoints.extend([landmark.x, landmark.y, landmark.z])
    return keypoints

# Function to save data and display messages
def save_data(action, frame_num, keypoints, image):
    try:
        action_path = os.path.join('MP_Data3', action)
        os.makedirs(action_path, exist_ok=True)  # Create directory if it doesn't exist

        # Save keypoints data
        npy_path = os.path.join(action_path, f"{frame_num}.npy")
        np.save(npy_path, keypoints)

        # Save the corresponding image
        frame_image_path = os.path.join(action_path, f"{frame_num}.jpg")
        cv2.imwrite(frame_image_path, image)

        print(f"Saved data for {action}: Frame {frame_num + 1}")
    except Exception as e:
        print(f"Error saving data: {e}")

# Function to draw a rectangle around the hand based on landmarks
def draw_hand_rectangle(image, hand_landmarks, w, h):
    x_min = min([int(landmark.x * w) for landmark in hand_landmarks.landmark])
    x_max = max([int(landmark.x * w) for landmark in hand_landmarks.landmark])
    y_min = min([int(landmark.y * h) for landmark in hand_landmarks.landmark])
    y_max = max([int(landmark.y * h) for landmark in hand_landmarks.landmark])

    # Add padding for better visibility of the rectangle
    padding = 20
    x_min = max(0, x_min - padding)
    x_max = min(w, x_max + padding)
    y_min = max(0, y_min - padding)
    y_max = min(h, y_max + padding)

    # Draw the rectangle on the image
    cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

# Set up the hand model with adjusted confidence
with mp_hands.Hands(
    min_detection_confidence=0.6,  # Adjusted confidence
    min_tracking_confidence=0.6  # Adjusted confidence
) as hands:

    while True:
        action = input("Enter pose label (hello, this, prototype) or 'q' to quit: ").strip().lower()
        time.sleep(1.5)
        # Handle 'q' input to quit
        if action == 'q':
            print("Quitting...")
            break

        # Ensure a valid action is provided
        if action not in ['hello', 'this', 'prototype','seeyoulater']:
            print("Invalid action label. Please enter a valid label.")
            continue

        print(f"Starting capture for pose: {action}")

        for frame_num in range(50):  # Collect 50 frames per action
            time.sleep(0.5)

            if not cap.isOpened():
                print("Error: Video capture not initialized.")
                break

            ret, frame = cap.read()
            if not ret:
                print("Error: Frame capture failed.")
                continue

            # Get frame dimensions
            h, w, _ = frame.shape

            # Recolor frame to RGB for processing
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False

            # Make hand detections only
            results = hands.process(image)

            # Recolor back to BGR for OpenCV
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw hand landmarks and a rectangle around the hand
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    # Draw the landmarks on the image
                    mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    # Draw a rectangle around the hand
                    draw_hand_rectangle(image, hand_landmarks, w, h)

            # Extract keypoints and save data (hands only)
            keypoints = extract_keypoints(results)
            save_data(action, frame_num, keypoints, image)

            # Display the frame
            cv2.imshow('Webcam Feed', image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                print("Capture interrupted by user.")
                break

print("Data collection completed.")
cap.release()
cv2.destroyAllWindows()


Enter pose label (hello, this, prototype) or 'q' to quit: q
Quitting...
Data collection completed.


In [3]:
import os
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

def load_data(data_path):
    X = []
    y = []
    actions = os.listdir(data_path)
    print(f"Actions found: {actions}")

    for action in actions:
        action_path = os.path.join(data_path, action)
        if not os.path.isdir(action_path):
            print(f"Skipping non-directory: {action_path}")
            continue

        files = os.listdir(action_path)
        print(f"Files found for action '{action}': {files}")

        for file in files:
            file_path = os.path.join(action_path, file)
            if file.endswith('.npy'):
                try:
                    keypoints = np.load(file_path)
                    if keypoints.size == 0:
                        print(f"Warning: Empty file {file_path}")
                        continue
                    X.append(keypoints)
                    y.append(action)
                    print(f"Loaded data from {file_path}, shape: {keypoints.shape}")
                except Exception as e:
                    print(f"Error loading {file_path}: {e}")
            else:
                print(f"Skipping non-npy file: {file_path}")

    # Handle inconsistent shapes by padding, truncating, or leaving as list
    max_length = max([item.shape[0] for item in X])
    print(f"Maximum sequence length found: {max_length}")
    X_padded = np.array([np.pad(item, (0, max_length - item.shape[0]), 'constant') if item.shape[0] < max_length else item for item in X])
    y = np.array(y)

    return X_padded, y  # or return X_truncated, y if truncating

# Load data
data_path = 'MP_Data3'
X, y = load_data(data_path)

# Check loaded data
print(f"Loaded {len(X)} samples and {len(y)} labels.")
if len(X) == 0 or len(y) == 0:
    raise ValueError("No data found. Check the data collection and storage process.")

print("Sample X shape:", X.shape)
print("Sample y shape:", y.shape)

# Encode labels
actions = np.unique(y)
print("Actions:", actions)

# Check if y is empty
if len(y) == 0:
    raise ValueError("No data found for encoding. Ensure that data is collected and stored properly.")

y_encoded = np.array([np.where(actions == label)[0][0] for label in y])

# Convert labels to categorical
y_encoded = to_categorical(y_encoded)
print("y_encoded shape:", y_encoded.shape)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")


Actions found: ['hello', 'prototype', 'seeyoulater', 'this']
Files found for action 'hello': ['0.jpg', '0.npy', '1.jpg', '1.npy', '10.jpg', '10.npy', '11.jpg', '11.npy', '12.jpg', '12.npy', '13.jpg', '13.npy', '14.jpg', '14.npy', '15.jpg', '15.npy', '16.jpg', '16.npy', '17.jpg', '17.npy', '18.jpg', '18.npy', '19.jpg', '19.npy', '2.jpg', '2.npy', '20.jpg', '20.npy', '21.jpg', '21.npy', '22.jpg', '22.npy', '23.jpg', '23.npy', '24.jpg', '24.npy', '25.jpg', '25.npy', '26.jpg', '26.npy', '27.jpg', '27.npy', '28.jpg', '28.npy', '29.jpg', '29.npy', '3.jpg', '3.npy', '30.jpg', '30.npy', '31.jpg', '31.npy', '32.jpg', '32.npy', '33.jpg', '33.npy', '34.jpg', '34.npy', '35.jpg', '35.npy', '36.jpg', '36.npy', '37.jpg', '37.npy', '38.jpg', '38.npy', '39.jpg', '39.npy', '4.jpg', '4.npy', '40.jpg', '40.npy', '41.jpg', '41.npy', '42.jpg', '42.npy', '43.jpg', '43.npy', '44.jpg', '44.npy', '45.jpg', '45.npy', '46.jpg', '46.npy', '47.jpg', '47.npy', '48.jpg', '48.npy', '49.jpg', '49.npy', '5.jpg', '5.npy'

In [4]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense

# Define a simple feedforward neural network
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(y_train.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=15, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Save the model
model.save('action_recognition_model.h5')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 149ms/step - accuracy: 0.2667 - loss: 1.4133 - val_accuracy: 0.3750 - val_loss: 1.3059
Epoch 2/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3375 - loss: 1.2927 - val_accuracy: 0.3438 - val_loss: 1.2477
Epoch 3/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.6021 - loss: 1.2373 - val_accuracy: 0.7812 - val_loss: 1.1842
Epoch 4/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.8542 - loss: 1.1659 - val_accuracy: 1.0000 - val_loss: 1.1165
Epoch 5/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.9563 - loss: 1.1023 - val_accuracy: 1.0000 - val_loss: 1.0437
Epoch 6/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 1.0000 - loss: 1.0506 - val_accuracy: 1.0000 - val_loss: 0.9767
Epoch 7/15
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━



Test Accuracy: 100.00%


In [5]:
import cv2
import os
import numpy as np

def load_data(data_dir):
    images = []
    labels = []
    class_names = []
    
    class_to_index = {}
    index = 0
    
    # Define acceptable image extensions
    valid_extensions = ['.jpg', '.jpeg', '.png']
    
    # Iterate through directories and load images
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if os.path.isdir(class_dir):
            class_names.append(class_name)
            class_to_index[class_name] = index
            index += 1
            
            for filename in os.listdir(class_dir):
                file_path = os.path.join(class_dir, filename)
                ext = os.path.splitext(filename)[1].lower()
                
                if ext not in valid_extensions:
                    print(f"Skipping unsupported file {file_path}")
                    continue
                
                # Load and preprocess image
                img = cv2.imread(file_path)
                
                if img is None:
                    print(f"Warning: Unable to load image {file_path}")
                    continue
                
                # Ensure the image is resized properly
                try:
                    img = cv2.resize(img, (50, 50))  # Use the same image_size as defined
                except cv2.error as e:
                    print(f"Error resizing image {file_path}: {e}")
                    continue
                
                images.append(img)
                labels.append(class_to_index[class_name])  # Convert label to integer
    
    images = np.array(images)
    labels = np.array(labels)
    return images, labels, class_names


In [6]:
# Assuming the directory structure is as follows:
# MP_Data/
# ├── hello/
# │   ├── 0.jpg
# │   ├── 1.jpg
# │   └── 2.jpg
# ├── thanks/
# │   ├── 0.jpg
# │   ├── 1.jpg
# │   └── 2.jpg
# └── iloveyou/
#     ├── 0.jpg
#     ├── 1.jpg
#     └── 2.jpg

import os
import cv2
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Directory containing action folders
data_dir = 'MP_Data3'

# Initialize variables to store images and labels
images = []
labels = []
class_names = []

# Dictionary to map class names to indices
class_to_index = {}
index = 0

# Define the target image size
image_size = (50, 50)  # Update as needed

# Iterate through each action folder
for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)
    if os.path.isdir(class_dir):
        class_names.append(class_name)
        class_to_index[class_name] = index
        index += 1
        
        for filename in os.listdir(class_dir):
            file_path = os.path.join(class_dir, filename)
            ext = os.path.splitext(filename)[1].lower()
            
            if ext in ['.jpg', '.jpeg', '.png']:
                # Load and resize image
                img = cv2.imread(file_path)
                if img is not None:
                    img = cv2.resize(img, image_size)
                    images.append(img)
                    labels.append(class_to_index[class_name])

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Normalize images to the range [0, 1]
images = images.astype('float32') / 255.0

# One-hot encode labels
labels = to_categorical(labels, num_classes=len(class_names))

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Build a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(class_names), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Save the trained model
model.save('action_recognition_model.h5')


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 187ms/step - accuracy: 0.3537 - loss: 1.5643 - val_accuracy: 0.6750 - val_loss: 1.1032
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.7219 - loss: 1.0180 - val_accuracy: 0.6250 - val_loss: 0.8785
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.8265 - loss: 0.6145 - val_accuracy: 1.0000 - val_loss: 0.4429
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.9668 - loss: 0.3403 - val_accuracy: 1.0000 - val_loss: 0.1948
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.9979 - loss: 0.1583 - val_accuracy: 1.0000 - val_loss: 0.1429
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.9923 - loss: 0.0748 - val_accuracy: 1.0000 - val_loss: 0.0403
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37



In [8]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
model.save('action_recognition_model.h5')


print(model.input_shape)
def preprocess_image(image):
    image = cv2.resize(image, (50, 50))  # Resize to match model input size
    image = image.astype('float32') / 255.0  # Normalize
    image = image.flatten()  # Flatten the image to a 1D array
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 1.0000 - loss: 0.0043




Test Accuracy: 100.00%
(None, 50, 50, 3)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
Predicted class: prototype


In [16]:
model.summary()



In [21]:
!pip install pyttsx3



Defaulting to user installation because normal site-packages is not writeable
Collecting pyttsx3
  Obtaining dependency information for pyttsx3 from https://files.pythonhosted.org/packages/1c/05/e203982f967cbf4959b383a237142b8df1c2cbc6e0174637cd3f5ce3730c/pyttsx3-2.97-py3-none-any.whl.metadata
  Downloading pyttsx3-2.97-py3-none-any.whl.metadata (3.8 kB)
Collecting comtypes (from pyttsx3)
  Obtaining dependency information for comtypes from https://files.pythonhosted.org/packages/b7/be/f329ad3247f323f350395ad02f33937c9f189b0f8862e9319dd90ab0b867/comtypes-1.4.7-py3-none-any.whl.metadata
  Downloading comtypes-1.4.7-py3-none-any.whl.metadata (6.5 kB)
Collecting pypiwin32 (from pyttsx3)
  Obtaining dependency information for pypiwin32 from https://files.pythonhosted.org/packages/d0/1b/2f292bbd742e369a100c91faa0483172cd91a1a422a6692055ac920946c5/pypiwin32-223-py3-none-any.whl.metadata
  Downloading pypiwin32-223-py3-none-any.whl.metadata (236 bytes)
Downloading pyttsx3-2.97-py3-none-any.wh





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Quitting...
