In [9]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from mediapipe import solutions

# Load the trained model
def load_trained_model(model_path='sign_language_model.h5'):
    return load_model(model_path)

# Function to extract keypoints from the video frame using MediaPipe Holistic
def extract_keypoints_from_frame(frame, holistic):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(frame_rgb)

    if results.pose_landmarks is not None:
        keypoints = []
        for landmark in results.pose_landmarks.landmark:
            keypoints.append([landmark.x, landmark.y, landmark.z, landmark.visibility])
        return np.array(keypoints)
    return None

# Preprocess the keypoints to fit the LSTM model input shape
def preprocess_keypoints(keypoints, max_sequence_length=30):
    # Pad keypoints to 33 landmarks if fewer than 33 are detected
    if len(keypoints) < 33:
        keypoints = np.pad(keypoints, ((0, 33 - len(keypoints)), (0, 0)), 'constant')
    else:
        keypoints = keypoints[:33]  # Take only the first 33 landmarks

    # Reshape to match the LSTM input shape (1, max_sequence_length, num_keypoints * 4)
    keypoints = keypoints.reshape((1, 1, 33 * 4))  # 33 keypoints with 4 values each
    return keypoints

# Function to predict the sign language label
def predict_sign_language(frame, holistic, model, label_map):
    # Extract keypoints from the frame
    keypoints = extract_keypoints_from_frame(frame, holistic)
    if keypoints is None:
        return None

    # Preprocess the keypoints
    processed_keypoints = preprocess_keypoints(keypoints)

    # Predict using the trained model
    prediction = model.predict(processed_keypoints)

    # Get the predicted label index
    predicted_label_idx = np.argmax(prediction)

    # Get the label name (use the reverse of label_map from the training phase)
    label_name = [label for label, idx in label_map.items() if idx == predicted_label_idx][0]
    
    return label_name

# Main function to process the video and predict the sign language
def process_video(model, label_map, camera_index=0):
    # Set up MediaPipe Holistic
    holistic = solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

    # Open the webcam for real-time video capture
    cap = cv2.VideoCapture(camera_index)  # Use camera index for real-time video capture

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Flip the frame horizontally for better visualization
        frame = cv2.flip(frame, 1)

        # Get the predicted sign language word
        predicted_label = predict_sign_language(frame, holistic, model, label_map)

        # Display the prediction text on the frame
        if predicted_label is not None:
            cv2.putText(frame, predicted_label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # Show the frame with the predicted label
        cv2.imshow('Sign Language Recognition (Real-time)', frame)

        # If 'q' is pressed, break the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Run the real-time video processing and prediction
model_path = 'sign_language_model.h5'  # Path to the trained model
label_map = {  # Example label_map, replace with your actual label_map
    'Summer': 0,
    'Winter': 1,
    'Spring': 2,
    'Autumn': 3,
    'Monsoon': 4
}
# Load the trained model
model = load_trained_model(model_path)

# Process the video and predict in real-time
process_video(model, label_map)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2