In [1]:
pip install pypiwin32

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install gTTS pygame

Collecting gTTSNote: you may need to restart the kernel to use updated packages.

  Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Collecting pygame
  Downloading pygame-2.6.1-cp311-cp311-win_amd64.whl (10.6 MB)
                                              0.0/10.6 MB ? eta -:--:--
                                              0.0/10.6 MB ? eta -:--:--
                                              0.0/10.6 MB ? eta -:--:--
                                              0.1/10.6 MB 1.0 MB/s eta 0:00:11
                                              0.2/10.6 MB 1.0 MB/s eta 0:00:11
                                              0.2/10.6 MB 1.3 MB/s eta 0:00:09
     -                                        0.3/10.6 MB 1.3 MB/s eta 0:00:09
     -                                        0.3/10.6 MB 1.2 MB/s eta 0:00:09
     -                                        0.4/10.6 MB 1.3 MB/s eta 0:00:08
     -                                        0.4/10.6 MB 1.3 MB/s eta 0:00:08
     -            

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import pickle
import pandas as pd
import time
import os
from gtts import gTTS
from pygame import mixer
import threading
import warnings
from collections import Counter, deque

warnings.filterwarnings("ignore", category=UserWarning)

# 1. INISIALISASI AUDIO GOOGLE (gTTS)
mixer.init()

def speak_google(text):
    def _run():
        filename = f"assets/{text}.mp3"
        # Buat folder assets jika belum ada
        if not os.path.exists('assets'): os.makedirs('assets')
        
        try:
            # Jika file suara belum ada, download dulu (Cache system)
            if not os.path.exists(filename):
                tts = gTTS(text=text, lang='id')
                tts.save(filename)
            
            # Putar suara
            mixer.music.load(filename)
            mixer.music.play()
        except Exception as e:
            print(f"Error Suara: {e}")
            
    threading.Thread(target=_run, daemon=True).start()

# 2. LOAD MODEL & MEDIAPIPE
with open('models/model_sign_language.pkl', 'rb') as f:
    model = pickle.load(f)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=2, model_complexity=0, min_detection_confidence=0.5)

# 3. VARIABEL KONTROL
current_word = ""          
sentence_history = []      
last_word_time = 0         
last_sentence_time = 0     
prediction_buffer = deque(maxlen=8) # Buffer kecil (8 frame) agar super cepat
feature_columns = [f'{c}{i}' for i in range(42) for c in ['x', 'y', 'z']]

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret: break
    
    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)
    current_time = time.time()

    if results.multi_hand_landmarks:
        data_row = np.zeros(126).tolist()
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            if idx > 1: break
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            for i, lm in enumerate(hand_landmarks.landmark):
                index_awal = (idx * 63) + (i * 3)
                data_row[index_awal:index_awal+3] = [lm.x, lm.y, lm.z]

        df_input = pd.DataFrame([data_row], columns=feature_columns)
        probs = model.predict_proba(df_input)
        confidence = np.max(probs)
        raw_pred = model.classes_[np.argmax(probs)]

        if confidence >= 0.70:
            prediction_buffer.append(raw_pred)

        if len(prediction_buffer) == 8:
            counts = Counter(prediction_buffer)
            word, count = counts.most_common(1)[0]
            if count >= 6: # Konsistensi 6 dari 8 frame
                if word != current_word or (current_time - last_word_time) > 2.0:
                    current_word = word
                    sentence_history.append(current_word)
                    last_word_time = current_time
                    last_sentence_time = current_time
                    # PANGGIL SUARA GOOGLE
                    speak_google(current_word)
                    prediction_buffer.clear()
    else:
        prediction_buffer.clear()

    # --- RENDER UI ---
    
    # A. KALIMAT POJOK KIRI ATAS (Kotak Transparan Kecil)
    if current_time - last_sentence_time < 10.0 and sentence_history:
        full_txt = " ".join(sentence_history).upper()
        if len(full_txt) > 30: full_txt = "..." + full_txt[-27:]
        t_size = cv2.getTextSize(full_txt, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)[0]
        sub_bg = frame.copy()
        cv2.rectangle(sub_bg, (10, 10), (20 + t_size[0], 40), (0,0,0), -1)
        frame = cv2.addWeighted(sub_bg, 0.5, frame, 0.5, 0)
        cv2.putText(frame, full_txt, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
    else:
        if current_time - last_sentence_time >= 10.0: sentence_history = []

    # B. SUBTITLE TENGAH BAWAH (Besar)
    if current_time - last_word_time < 2.5 and current_word:
        overlay = frame.copy()
        cv2.rectangle(overlay, (0, h-70), (w, h), (0,0,0), -1)
        frame = cv2.addWeighted(overlay, 0.6, frame, 0.4, 0)
        tw_size = cv2.getTextSize(current_word.upper(), cv2.FONT_HERSHEY_DUPLEX, 1.2, 2)[0]
        cv2.putText(frame, current_word.upper(), ((w - tw_size[0]) // 2, h-25), 
                    cv2.FONT_HERSHEY_DUPLEX, 1.2, (255, 255, 255), 2)

    cv2.imshow('AI Translator Pro - Google Voice', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'): break

cap.release()
cv2.destroyAllWindows()