In [None]:
import cv2
import numpy as np
import mediapipe as mp
from keras.models import load_model
from PIL import Image, ImageFont, ImageDraw
import arabic_reshaper
from bidi.algorithm import get_display
import json

# Load the trained model
model = load_model('working/LSTM_Model_1.h5')

# Load label map
with open("working/label_map.json", "r", encoding="utf-8") as file:
    label_map = json.load(file)
reverse_label_map = {v: k for k, v in label_map.items()}

# Path to Arabic font file
font_path = "working/Adobe Arabic Regular.ttf"
font = ImageFont.truetype(font_path, 32)

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Function to adjust landmarks based on a reference point
def adjust_landmarks(landmarks, center):
    if len(landmarks) == 0:
        return np.zeros_like(center)
    landmarks = landmarks.reshape(-1, 3)
    center_repeated = np.tile(center, (len(landmarks), 1))
    adjusted_landmarks = landmarks - center_repeated
    return adjusted_landmarks.flatten()

# Function to extract and adjust keypoints
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    nose = pose[:3] if len(pose) >= 3 else np.zeros(3)
    pose_adjusted = adjust_landmarks(pose, nose)
    lh_adjusted = adjust_landmarks(lh, lh[:3] if len(lh) >= 3 else np.zeros(3))
    rh_adjusted = adjust_landmarks(rh, rh[:3] if len(rh) >= 3 else np.zeros(3))
    return np.concatenate([pose_adjusted, lh_adjusted, rh_adjusted])

# Function to detect gestures confidently
def is_gesture_detected(lh, rh):
    def bounding_box_area(hand_landmarks):
        if len(hand_landmarks) == 0:
            return 0
        x_coords = hand_landmarks[::3]
        y_coords = hand_landmarks[1::3]
        return (np.max(x_coords) - np.min(x_coords)) * (np.max(y_coords) - np.min(y_coords))
    lh_area = bounding_box_area(lh)
    rh_area = bounding_box_area(rh)
    return lh_area > 0.01 or rh_area > 0.01

# Real-time gesture recognition with toggle functionality
sequence = []
sequence_length = 48
last_prediction = ""
collecting = False  # Flag to track whether keypoint collection is active

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(frame_rgb)

        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
        mp_drawing.draw_landmarks(frame, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

        # Handle key press to toggle collection
        key = cv2.waitKey(10) & 0xFF
        if key == ord('s'):  # Press 's' to toggle collection
            collecting = not collecting
            if not collecting:
                # Reset sequence and clear prediction when stopping collection
                sequence = []
                last_prediction = ""
                print("Keypoint collection stopped.")
            else:
                print("Keypoint collection started.")

        # Collect keypoints if collection is active
        if collecting:
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)

            if len(sequence) > sequence_length:
                sequence = sequence[-sequence_length:]

            if len(sequence) == sequence_length:
                input_sequence = np.expand_dims(np.array(sequence), axis=0)
                prediction = model.predict(input_sequence)
                predicted_class = np.argmax(prediction, axis=1)[0]
                confidence = np.max(prediction)

                if confidence > 0.7:  # Confidence threshold
                    last_prediction = reverse_label_map[predicted_class]
                    print(f"Prediction: {last_prediction} (Confidence: {confidence:.2f})")

                # Reset sequence after prediction
                sequence = []

        # Display feedback
        status_text = "Collecting" if collecting else "Idle"
        reshaped_text = arabic_reshaper.reshape(last_prediction)
        bidi_text = get_display(reshaped_text)
        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        draw = ImageDraw.Draw(frame_pil)
        draw.text((10, 50), f"Status: {status_text}", font=font, fill=(0, 255, 0))
        draw.text((10, 100), bidi_text, font=font, fill=(255, 0, 0))
        frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)

        cv2.imshow('ترجمة لغة الإشارة العربية', frame)

        if key == ord('q'):  # Press 'q' to quit
            break

cap.release()
cv2.destroyAllWindows()

print(f"Predicted Word: {last_prediction}")

Keypoint collection started.
Keypoint collection stopped.
Keypoint collection started.
Prediction: البنكرياس (Confidence: 0.95)
Prediction: البنكرياس (Confidence: 0.97)
Prediction: يسقي (Confidence: 0.71)
Prediction: صديق (Confidence: 0.80)
Prediction: يوم القيامة (Confidence: 0.99)
Prediction: أركان الإسلام (Confidence: 0.97)
Prediction: فحص سريري (Confidence: 0.95)
