In [None]:
import cv2
import os
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
from PIL import Image, ImageDraw, ImageFont
import time

# Initialize MediaPipe modules.
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# Camera settings
cap = cv2.VideoCapture(0) 
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
cap.set(cv2.CAP_PROP_FPS, 24)

# Window size
window_width, window_height = 1024, 720

DATASET_PATH = 'DATASET'
actions = np.array(os.listdir(DATASET_PATH))
model = load_model('sonuclar/CNN+GRU_best_model2.keras', compile=False)

sentence, keypoints, last_prediction = [], [], []

# Font settings (Turkish character support)
try:
    font = ImageFont.truetype("arial.ttf", 32)
except:
    font = ImageFont.load_default()

ret, frame = cap.read()

with mp_holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.7) as holistic:
    while ret:
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = holistic.process(image)
        image.flags.writeable = True

        frame_height, frame_width, _ = frame.shape

        if results.pose_landmarks:
            left_shoulder = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_SHOULDER]
            right_shoulder = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.RIGHT_SHOULDER]

            x_center = int(((left_shoulder.x + right_shoulder.x) / 2) * frame_width)
            y_center = int(((left_shoulder.y + right_shoulder.y) / 2) * frame_height)

            start_x = max(x_center - window_width // 2, 0)
            end_x = start_x + window_width
            if end_x > frame_width:
                end_x = frame_width
                start_x = frame_width - window_width

            start_y = max(y_center - window_height // 2, 0)
            end_y = start_y + window_height
            if end_y > frame_height:
                end_y = frame_height
                start_y = frame_height - window_height

            window = frame[start_y:end_y, start_x:end_x]

            # Extract the landmarks.
            pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 4)
            face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468 * 3)
            lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
            rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)

            res_all = np.concatenate([pose, face, lh, rh])

            indices_to_remove = np.r_[92:1536]
            filtered_keypoints = np.delete(res_all, indices_to_remove)
            keypoints.append(filtered_keypoints)

            # Drawing on the image
            window = cv2.flip(window, 1)
            pil_image = Image.fromarray(cv2.cvtColor(window, cv2.COLOR_BGR2RGB))
            draw = ImageDraw.Draw(pil_image)

            # Show the frame count to the user
            if len(keypoints) < 30:
                progress_text = f"Veri Toplanıyor: {len(keypoints)}/30"
                draw.text((50, window.shape[0] - 50), progress_text, font=font, fill=(255, 0, 0))

            # Make a prediction after 30 frames.
            if len(keypoints) == 30:
                keypoints_np = np.array(keypoints)
                prediction = model.predict(keypoints_np[np.newaxis, :, :])
                keypoints = []

                top3_indices = prediction[0].argsort()[-3:][::-1]
                top3_labels_scores = [(actions[i], prediction[0][i]) for i in top3_indices]

                top_label = actions[top3_indices[0]]
                top_score = prediction[0][top3_indices[0]]

                if top_score > 0.7:
                    if last_prediction != top_label:
                        sentence.append(top_label)
                        last_prediction = top_label
                        time.sleep(4.0)

            # Limit the sentence length
            if len(sentence) > 7:
                sentence = sentence[-7:]

            # Write the centered sentence.
            text = ' '.join(sentence)
            text_bbox = draw.textbbox((0, 0), text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_x = (window.shape[1] - text_width) // 2
            draw.text((text_x, 650), text, font=font, fill=(255, 255, 255))

            # Write the top 3 predictions in the top-left corner.
            if 'top3_labels_scores' in locals():
                y_offset = 20
                for label, score in top3_labels_scores:
                    color = (0, 200, 0) if score > 0.7 else (255, 165, 0) if score > 0.4 else (180, 180, 180)
                    draw.text((30, y_offset), f"{label}: {score:.2f}", font=font, fill=color)
                    y_offset += 40

            # Display the image.
            window = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
            cv2.imshow('Turkce Isaret Dili Tanıma', window)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        ret, frame = cap.read()

cap.release()
cv2.destroyAllWindows()
