In [2]:
import tensorflow as tf
import numpy as np
import cv2
import os
from typing import List, Tuple
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv3D, Dense, LSTM, Dropout, BatchNormalization
from tensorflow.keras.layers import MaxPooling3D, Reshape, Bidirectional, Activation
from tensorflow.keras.optimizers import RMSprop

class MouthMapPreprocessor:
    def __init__(self, vocab="abcdefghijklmnopqrstuvwxyz'?! "):
        self.vocab = list(vocab)
        self.char_to_idx = {char: idx for idx, char in enumerate(self.vocab)}
        self.idx_to_char = {idx: char for idx, char in enumerate(self.vocab)}
        
    def process_video(self, video_path: str, target_size: Tuple[int, int] = (120, 160)) -> np.ndarray:
        frames = []
        cap = cv2.VideoCapture(video_path)
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
            mouth_region = gray[200:280, 160:320]
        
            resized = cv2.resize(mouth_region, target_size)
            
            normalized = resized / 255.0
            frames.append(normalized)
            
        cap.release()
        return np.array(frames)

    def encode_text(self, text: str) -> np.ndarray:
        """Convert text to numerical sequence."""
        return np.array([self.char_to_idx[c] for c in text.lower() if c in self.char_to_idx])
    
    def decode_prediction(self, prediction: np.ndarray) -> str:
        """Convert numerical prediction back to text."""
        return ''.join([self.idx_to_char[idx] for idx in prediction if idx < len(self.vocab)])
