In [None]:
import os
import cv2
import time
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model

mp_holistic = mp.solutions.holistic 
mp_drawing = mp.solutions.drawing_utils 

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 
    
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                            mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                            mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                            mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                            mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)) 

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

sequence = []
sentence = []
predictions = []
colors = [(245, 221, 173), (245, 185, 265), (146, 235, 193), (204, 152, 295), (255, 217, 179), (0, 0, 179)]
threshold = 0.75
last_update_time = None
stable_time = 1.0

def prob_viz(res, actions, input_frame, colors, threshold, max_actions=20):
    overlay = input_frame.copy()
    multiple = 30
    alpha = 0.6
    for num, action in enumerate(actions[:max_actions]):
        prob = res[num] if num < len(res) else 0
        color = colors[num % len(colors)]
        (text_width, text_height), baseline = cv2.getTextSize(action + ' ' + str(round(prob * 100, 2)) + '% ', 
        cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
        cv2.rectangle(overlay, (0, 30 + num * multiple), (int(prob * 300), 60 + num * multiple), color, -1)
    overlay = cv2.addWeighted(overlay, alpha, input_frame, 1 - alpha, 0)
    
    for num, action in enumerate(actions[:max_actions]):
        prob = res[num] if num < len(res) else 0
        color = colors[num % len(colors)]
        cv2.putText(overlay, action + ' ' + str(round(prob * 100, 2)) + '%', (5, 50 + num * multiple), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1, cv2.LINE_AA)
    return overlay

actions = np.array(['hai', 'nama', 'aku', 'kamu', 'dia', 'sehat', 'maaf', 'tolong', 'terima_kasih', 'no_action'])
model_path = r'C:\Users\ilham\Documents\SignLanguage\PengujianAkhir\1. JumlahKelasDataset\Model\ModelLSTM-10.h5'
model = load_model(model_path)

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 517.50)
mp_holistic = mp.solutions.holistic
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))
            current_action = actions[np.argmax(res)]
            current_prob = res[np.argmax(res)]

            if current_prob > threshold:
                current_time = time.time()
                if last_update_time is None:
                    last_update_time = current_time
                elif current_time - last_update_time >= stable_time:
                    if len(sentence) == 0 or (sentence[-1] != current_action):
                        sentence.append(current_action)
                        last_update_time = current_time
            else:
                last_update_time = None

            if len(sentence) > 4:
                sentence = sentence[-4:]

            image = prob_viz(res, actions, image, colors, threshold)

        cv2.rectangle(image, (0, image.shape[0] - 40), (image.shape[1], image.shape[0]), (245, 117, 16), -1)
        text_size = cv2.getTextSize(' '.join(sentence), cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
        text_x = (image.shape[1] - text_size[0]) // 2
        cv2.putText(image, ' '.join(sentence), ((image.shape[1] - text_size[0]) // 2, image.shape[0] - 10),
        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()