In [10]:
import sys
print(sys.executable)


c:\Users\adelg\AppData\Local\Programs\Python\Python39\python.exe


In [11]:
# ---- Environment setup (CPU + MediaPipe/protobuf) ----
# Run this cell FIRST, before importing TensorFlow/MediaPipe.
# If you install/upgrade packages (protobuf/mediapipe), restart the kernel afterwards.

import os

# Force CPU-only TensorFlow (reduces crashes / avoids GPU issues)
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# On some Windows setups this can avoid protobuf binary issues,
# but it will NOT fix an incompatible protobuf version.
os.environ.setdefault("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python")

# Optional: quieter logs
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")

print("Environment set: CPU-only. Next: run the Imports cell to validate protobuf.")


Environment set: CPU-only. Next: run the Imports cell to validate protobuf.


# Arabic Combined Architecture (GPU Optimized)

This notebook provides the **same core functionality** as the original combined notebook: real-time sign recognition + sentence building — but adapted for **Arabic (ArSL)** and improved for **GPU + camera feedback**.

**What it does**

- Uses **two models** in real-time: an image model (MobileNetV2) + a landmark model (MediaPipe MLP).
- Fuses their probabilities, smooths predictions, and builds a sentence using `space`, `del`, `nothing`.
- Shows clearer camera overlay: FPS, GPU status, stable prediction, and a **two-hands warning**.

**How to run**

1. Run cells in order until model loading succeeds.
2. Run the last "Run camera" cell (press `q` to quit).

**Optional (better Arabic text rendering)**

- Install: `pip install pillow arabic-reshaper python-bidi` to display properly-shaped Arabic in the overlay. Otherwise, it falls back to OpenCV text.


## Cell: Imports

Loads Python/TensorFlow/OpenCV/MediaPipe dependencies used by the rest of the notebook.


In [12]:
import os
import time
from pathlib import Path
from collections import deque, Counter

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# ---- Protobuf / MediaPipe compatibility preflight ----
import google.protobuf
from google.protobuf import message_factory as _message_factory

print(f"Protobuf version: {google.protobuf.__version__}")
PROTOBUF_OK = True
MP_OK = True
MP_REASON = None

# Mediapipe 0.10.x expects protobuf >=4.25.3 and <5
# If version is lower, warn and mark MP_OK=False so we can skip gracefully.
def _version_tuple(v):
    return tuple(int(x) for x in str(v).split('.') if x.isdigit())

pb_ver = _version_tuple(google.protobuf.__version__)
if pb_ver < (4, 25, 3) or pb_ver >= (5, 0, 0):
    PROTOBUF_OK = False
    MP_OK = False
    MP_REASON = ("Protobuf version incompatible for MediaPipe 0.10.x. "
                 "Please install protobuf>=4.25.3,<5 and restart kernel.")

if not hasattr(_message_factory, "GetMessageClass") and MP_OK:
    PROTOBUF_OK = False
    MP_OK = False
    MP_REASON = ("Incompatible protobuf: message_factory.GetMessageClass missing. "
                 "Install protobuf>=4.25.3,<5 and restart kernel.")

if not PROTOBUF_OK:
    print("[WARN] Protobuf not compatible for MediaPipe. MediaPipe will be disabled.")
    print(MP_REASON)

# Try to import mediapipe only if protobuf looks OK
if MP_OK:
    try:
        import mediapipe as mp
        try:
            mp_ver = getattr(mp, "__version__", None)
        except Exception:
            mp_ver = None
        print("MediaPipe imported.", "version=" + str(mp_ver) if mp_ver else "")
    except Exception as e:
        MP_OK = False
        MP_REASON = f"Failed to import mediapipe: {e}"
        print('[WARN] MediaPipe import failed; disabling hand tracking.')
else:
    mp = None
    print('[WARN] MediaPipe disabled due to protobuf version check.')


Protobuf version: 3.19.6
[WARN] Protobuf not compatible for MediaPipe. MediaPipe will be disabled.
Protobuf version incompatible for MediaPipe 0.10.x. Please install protobuf>=4.25.3,<5 and restart kernel.
[WARN] MediaPipe disabled due to protobuf version check.


## Cell: Project paths

Finds the project root and defines `ARABIC_DIR` and `GUIDE_DIR` so the notebook works no matter where you run it from.


In [13]:
# ---- Locate project folders (robust to different working directories) ----
def find_sign_to_sentence_root() -> Path:
    cwd = Path.cwd().resolve()
    for p in [cwd, *cwd.parents]:
        if p.name == "Sign_to_Sentence Project Main":
            return p
        if (p / "Sign_to_Sentence Project Main").exists():
            return (p / "Sign_to_Sentence Project Main").resolve()
    return cwd

ROOT = find_sign_to_sentence_root()
ARABIC_DIR = ROOT / "ArSL Letter (Arabic)"
GUIDE_DIR = ROOT / "Arabic guide"

print('ROOT:', ROOT)
print('ARABIC_DIR:', ARABIC_DIR, 'exists=', ARABIC_DIR.exists())
print('GUIDE_DIR:', GUIDE_DIR, 'exists=', GUIDE_DIR.exists())

if not ARABIC_DIR.exists():
    raise FileNotFoundError(f'Arabic folder not found at: {ARABIC_DIR}')


ROOT: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main
ARABIC_DIR: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic) exists= True
GUIDE_DIR: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\Arabic guide exists= True


## Cell: GPU / performance setup

Enables safe GPU options (memory-growth, optional XLA, optional mixed precision). If you see instability, set mixed precision off in that cell.


In [14]:
# ---- CPU-only / performance setup ----
# This notebook is configured for CPU stability + low CPU usage.

# Confirm TensorFlow sees no GPU
gpus = tf.config.list_physical_devices('GPU')
print('GPUs visible to TensorFlow:', gpus)
print('Running in CPU-only mode.')

# Reduce CPU usage by limiting TensorFlow thread pools (tune as needed)
try:
    tf.config.threading.set_intra_op_parallelism_threads(1)
    tf.config.threading.set_inter_op_parallelism_threads(1)
    print('TensorFlow threads limited (intra/inter = 1).')
except Exception as e:
    print('Could not set TF threading options:', e)

USE_MIXED_PRECISION = False


GPUs visible to TensorFlow: []
Running in CPU-only mode.
TensorFlow threads limited (intra/inter = 1).


## Cell: Arabic class labels

Loads the Arabic class list used to interpret model outputs and to build the final sentence. This must match how your Arabic models were trained.


In [15]:
# ---- Arabic class labels (prefer the repo's guide if available) ----
ARABIC_CLASSES = None
try:
    import importlib.util
    labels_path = GUIDE_DIR / 'arabic_class_labels.py'
    spec = importlib.util.spec_from_file_location('arabic_class_labels', str(labels_path))
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    ARABIC_CLASSES = list(mod.ARABIC_CLASSES)
    print('Loaded ARABIC_CLASSES from arabic_class_labels.py:', len(ARABIC_CLASSES))
except Exception as e:
    print('Falling back to inline ARABIC_CLASSES (could not import guide):', e)
    ARABIC_CLASSES = [
        'ا','ب','ت','ث','ج','ح','خ','د','ذ','ر',
        'ز','س','ش','ص','ض','ط','ظ','ع','غ','ف',
        'ق','ك','ل','م','ن','ه','و','ي',
        'space','del','nothing'
    ]

ARABIC_CLASSES = list(ARABIC_CLASSES)
CLASS_TO_INDEX = {c: i for i, c in enumerate(ARABIC_CLASSES)}
print('ARABIC_CLASSES:', ARABIC_CLASSES)


Arabic Sign Language Classes:
  - Letters: 28
  - Total classes: 31
  - Classes: ['ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ي', 'space', 'del', 'nothing']
Loaded ARABIC_CLASSES from arabic_class_labels.py: 31
ARABIC_CLASSES: ['ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ي', 'space', 'del', 'nothing']


## Cell: Load models + align labels

Loads Arabic MobileNet + Arabic MediaPipe-MLP and builds a mapping so the MLP output indices match the `ARABIC_CLASSES` order used for fusion.


In [None]:
# ---- Load models (CPU) + labels from a single CSV ----
# Requirement: use ONLY these three paths (no auto-search).
# - MobileNet model
# - MLP model
# - Keypoints CSV for label order

# IMPORTANT: For correctness, we do NOT create/pad "dummy" labels.
# If MobileNet outputs a different number of classes than the CSV, MobileNet will be disabled automatically.

FINAL_DIR = ARABIC_DIR / 'Final Notebooks'

# 1) EXACT paths (edit here only if you move files)
MOBILENET_PATH = FINAL_DIR / 'mobilenet_arabic_final.h5'
MLP_PATH = FINAL_DIR / 'arsl_mediapipe_mlp_model_final.h5'
KEYPOINTS_CSV = FINAL_DIR / 'FINAL_CLEAN_DATASET.csv'

print('MobileNet:', MOBILENET_PATH, 'exists=', MOBILENET_PATH.exists())
print('MLP:', MLP_PATH, 'exists=', MLP_PATH.exists())
print('CSV:', KEYPOINTS_CSV, 'exists=', KEYPOINTS_CSV.exists())

if not MOBILENET_PATH.exists():
    raise FileNotFoundError(f'MobileNet missing: {MOBILENET_PATH}')
if not MLP_PATH.exists():
    raise FileNotFoundError(f'MLP missing: {MLP_PATH}')
if not KEYPOINTS_CSV.exists():
    raise FileNotFoundError(f'CSV missing: {KEYPOINTS_CSV}')

# FIX: Force float32 policy BEFORE loading models.
# The MLP model was trained with mixed_float16, which crashes on CPU-only systems.
tf.keras.mixed_precision.set_global_policy('float32')

# 2) Load models (inference only)
mobilenet_model = tf.keras.models.load_model(str(MOBILENET_PATH), compile=False)
mlp_model_raw = tf.keras.models.load_model(str(MLP_PATH), compile=False)

# FIX: Convert MLP from mixed_float16 to float32 for CPU stability.
# Clone the model architecture under float32 policy, then copy weights.
try:
    mlp_model = tf.keras.models.clone_model(mlp_model_raw)
    mlp_model.set_weights(mlp_model_raw.get_weights())
    del mlp_model_raw
    print('✅ MLP model converted from mixed_float16 → float32 for CPU stability')
except Exception as e:
    print(f'⚠️  Could not clone model to float32, using original: {e}')
    mlp_model = mlp_model_raw

mn_dim = int(mobilenet_model.output_shape[-1])
mlp_dim = int(mlp_model.output_shape[-1])
print('MobileNet output classes:', mn_dim)
print('MLP output classes:', mlp_dim)

# 3) Labels from the CSV (single source of truth for label order)
labels_df = pd.read_csv(str(KEYPOINTS_CSV), usecols=['label'])
encoder = LabelEncoder()
encoder.fit(labels_df['label'])
CSV_LABELS = list(encoder.classes_)
print('CSV unique labels:', len(CSV_LABELS))

# MLP label order must match MLP outputs
if len(CSV_LABELS) != mlp_dim:
    raise RuntimeError(
        f'CSV labels ({len(CSV_LABELS)}) != MLP output classes ({mlp_dim}). '
        'This means the CSV is not the one used to train the MLP.'
    )
MLP_LABELS = list(CSV_LABELS)

# MobileNet: only use if it matches CSV label count
USE_MOBILENET = (len(CSV_LABELS) == mn_dim)
if not USE_MOBILENET:
    print(
        'MobileNet disabled for correctness/CPU: '
        f'CSV labels={len(CSV_LABELS)} but MobileNet outputs={mn_dim}.'
    )
    MN_LABELS = []
else:
    MN_LABELS = list(CSV_LABELS)

# 4) Fusion label space
FUSION_LABELS = list(dict.fromkeys((MN_LABELS if USE_MOBILENET else []) + MLP_LABELS))
FUSION_INDEX = {lab: i for i, lab in enumerate(FUSION_LABELS)}
print('Fusion label count:', len(FUSION_LABELS))

# 5) Transliteration -> Arabic character mapping for display/sentence (optional)
NAME_TO_ARABIC = {
    'Alef': 'ا', 'Beh': 'ب', 'Teh': 'ت', 'Theh': 'ث', 'Jeem': 'ج', 'Hah': 'ح', 'Khah': 'خ',
    'Dal': 'د', 'Thal': 'ذ', 'thal': 'ذ', 'Reh': 'ر', 'Zain': 'ز', 'Seen': 'س', 'Sheen': 'ش', 'Sad': 'ص',
    'Dad': 'ض', 'Tah': 'ط', 'Zah': 'ظ', 'Ain': 'ع', 'Ghain': 'غ', 'Feh': 'ف', 'Qaf': 'ق',
    'Kaf': 'ك', 'Lam': 'ل', 'Meem': 'م', 'Noon': 'ن', 'Heh': 'ه', 'Waw': 'و', 'Yeh': 'ي',
    'space': 'space', 'del': 'del', 'nothing': 'nothing',
    'Teh_Marbuta': None, 'Al': None, 'Laa': None,
}

def to_display_label(label: str) -> str:
    mapped = NAME_TO_ARABIC.get(label, None)
    if mapped is None:
        return str(label)
    return str(mapped)


MobileNet: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic)\Final Notebooks\mobilenet_arabic_final.h5 exists= True
MLP: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic)\Final Notebooks\arsl_mediapipe_mlp_model_final.h5 exists= True
CSV: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic)\Final Notebooks\FINAL_CLEAN_DATASET.csv exists= True
MobileNet output classes: 35
MLP output classes: 34
CSV unique labels: 34
MobileNet disabled for correctness/CPU: CSV labels=34 but MobileNet outputs=35.
Fusion label count: 34


## Cell: Report (what is loaded)

This section prints a quick **verification report** showing:

- Which MobileNet/MLP model files were loaded
- Output class counts
- Which CSV files were used to reconstruct the **exact label order** used in training
- A small label distribution preview (first few rows)


In [17]:
# ---- Verification report (CPU) ----
print('--- FILES ---')
print('MobileNet file:', MOBILENET_PATH)
print('MLP file:', MLP_PATH)
print('CSV:', KEYPOINTS_CSV)

print('\n--- MODEL OUTPUT DIMS ---')
print('MobileNet classes:', mn_dim)
print('MLP classes:', mlp_dim)
print('CSV label count:', len(CSV_LABELS))

print('\n--- MODE SWITCHES ---')
print('USE_MOBILENET:', USE_MOBILENET)

print('\n--- LABEL SAMPLES ---')
print('MLP_LABELS sample:', MLP_LABELS[:10])
print('FUSION_LABELS sample:', FUSION_LABELS[:10])

print('\n--- LABEL DISTRIBUTION (top 10) ---')
try:
    print(labels_df['label'].value_counts().head(10))
except Exception as e:
    print('Could not compute label counts:', e)

print('\n--- MODEL PARAMS ---')
print('MLP params:', mlp_model.count_params())
if USE_MOBILENET:
    print('MobileNet params:', mobilenet_model.count_params())


--- FILES ---
MobileNet file: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic)\Final Notebooks\mobilenet_arabic_final.h5
MLP file: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic)\Final Notebooks\arsl_mediapipe_mlp_model_final.h5
CSV: M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\ArSL Letter (Arabic)\Final Notebooks\FINAL_CLEAN_DATASET.csv

--- MODEL OUTPUT DIMS ---
MobileNet classes: 35
MLP classes: 34
CSV label count: 34

--- MODE SWITCHES ---
USE_MOBILENET: False

--- LABEL SAMPLES ---
MLP_LABELS sample: ['Ain', 'Al', 'Alef', 'Beh', 'Dad', 'Dal', 'Feh', 'Ghain', 'Hah', 'Heh']
FUSION_LABELS sample: ['Ain', 'Al', 'Alef', 'Beh', 'Dad', 'Dal', 'Feh', 'Ghain', 'Hah', 'Heh']

--- LABEL DISTRIBUTION (top 10) ---
label


## Cell: MediaPipe hand tracking helpers

Initializes MediaPipe Hands and defines helper functions for landmark extraction and bounding-box cropping.


In [None]:
if not MP_OK:
    raise RuntimeError(MP_REASON or 'MediaPipe not available; fix protobuf and restart kernel.')

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# CPU-friendly settings: model_complexity=0 is fastest.
# Keep max_num_hands=2 so we can warn when two hands appear.
hands = mp_hands.Hands(
    static_image_mode=False,
    model_complexity=0,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6,
    max_num_hands=2,
 )

def extract_landmark_features(hand_landmarks, handedness=None):
    """Flatten 21 hand landmarks into shape (1, 63).
    FIX: No mirroring — training data was extracted without any mirroring,
    so inference must match exactly."""
    landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark], dtype=np.float32)
    # NOTE: Removed right-hand mirroring. Training data did NOT mirror landmarks,
    # so mirroring at inference time causes wrong predictions.
    return landmarks.flatten()[None, :]

def landmarks_bbox_px(hand_landmarks, frame_shape, pad_w=150, pad_h=220):
    h, w = frame_shape[:2]
    xs = [lm.x for lm in hand_landmarks.landmark]
    ys = [lm.y for lm in hand_landmarks.landmark]
    x_min = max(0, int(min(xs) * w - pad_w))
    y_min = max(0, int(min(ys) * h - pad_h))
    x_max = min(w, int(max(xs) * w + pad_w))
    y_max = min(h, int(max(ys) * h + pad_h))
    return x_min, y_min, x_max, y_max

def softmax(x):
    x = x - np.max(x)
    ex = np.exp(x)
    return ex / (np.sum(ex) + 1e-9)


RuntimeError: Protobuf version incompatible for MediaPipe 0.10.x. Please install protobuf>=4.25.3,<5 and restart kernel.

## Optional: Better Arabic text rendering (recommended)

OpenCV’s built-in fonts don’t shape Arabic well. This block enables a Pillow-based overlay when available.
If these packages are missing, the notebook will fall back to `cv2.putText`.


In [None]:
ARABIC_TEXT_OK = False
try:
    from PIL import Image, ImageDraw, ImageFont
    import arabic_reshaper
    from bidi.algorithm import get_display
    ARABIC_TEXT_OK = True
except Exception:
    ARABIC_TEXT_OK = False

def put_text(frame_bgr, text, org, color=(255,255,255), font_scale=1.0, thickness=2):
    """Draw text. Uses Arabic shaping via PIL if available, else falls back to cv2.putText."""
    x, y = org
    if not ARABIC_TEXT_OK:
        cv2.putText(frame_bgr, str(text), (x, y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness, cv2.LINE_AA)
        return frame_bgr

    # PIL path (supports Arabic shaping)
    rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(rgb)
    draw = ImageDraw.Draw(img)

    shaped = get_display(arabic_reshaper.reshape(str(text)))

    # Try a common Windows Arabic-capable font; fallback to default if missing
    font = None
    for fp in [
        r'C:\\Windows\\Fonts\\arial.ttf',
        r'C:\\Windows\\Fonts\\tahoma.ttf'
    ]:
        if Path(fp).exists():
            try:
                font = ImageFont.truetype(fp, int(24 * font_scale))
                break
            except Exception:
                pass
    if font is None:
        font = ImageFont.load_default()

    draw.text((x, y), shaped, fill=(color[2], color[1], color[0]), font=font)
    out = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    return out

print('Arabic PIL overlay enabled:', ARABIC_TEXT_OK)


## Cell: Camera config + fusion helpers

Defines the fusion/smoothing/stabilization settings and helper functions used by the camera loop. Tune these first if the camera feels too slow or too sensitive.


In [None]:
# ---- CPU-friendly camera knobs ----
CAMERA_INDEX = 0

# Lower resolution reduces CPU usage a lot
FRAME_W, FRAME_H = 640, 480

# FIX: MobileNet model expects (96, 96, 3) input — was incorrectly set to (128, 128)
MOBILENET_INPUT = (96, 96)

# Smaller padding = smaller crop
PAD_W, PAD_H = 80, 120

# Run inference less often to reduce CPU load (still stable due to smoothing)
PROCESS_EVERY_N_FRAMES = 2  # 1 = every frame (highest CPU), 2 = half CPU
RUN_MOBILENET_EVERY_N = 4   # only used when USE_MOBILENET=True

EMA_ALPHA = 0.8
STABLE_WINDOW = 7
STABLE_MIN_COUNT = 5
CONF_THRESHOLD = 0.55

# ---- Commit-once-then-wait strategy ----
# After committing a letter, the system LOCKS that label.
# It won't accept the same letter again until:
#   a) The hand leaves the frame, OR
#   b) A genuinely DIFFERENT sign becomes stable.
# This prevents "mmmmmooooccc" repetition.
HOLD_TIME_REQUIRED = 0.8  # seconds to hold a sign before committing

# ---- State ----
predicted_sentence = ''
committed_label = None       # The label we already committed (locked)
waiting_for_change = False   # True after committing → blocking re-commit
current_sign_label = None    # The current sign being tracked for hold time
current_sign_start = None    # When the current sign was first seen
label_history = deque(maxlen=STABLE_WINDOW)
fps_times = deque(maxlen=30)
ema_probs = None


## Cell: Run camera (combined fusion)

Starts the real-time camera loop. Shows FPS, stable prediction, and builds the sentence. Press `q` to quit.


In [None]:
# ========================================================
# Run camera (CPU, stable, low usage)
# ========================================================
# Notes:
# - Uses ThreadedCamera to reduce lag.
# - Runs MediaPipe/MLP every N frames to reduce CPU.
# - MobileNet is optional and disabled automatically if label counts mismatch.
# - FIX: Frame is NOT flipped before MediaPipe (matches training data).
#   Flip is applied AFTER processing for selfie-view display only.
# - Uses commit-once-then-wait: each sign commits ONCE, then waits
#   for hand-drop or a different sign before committing again.

from threading import Thread

class ThreadedCamera:
    def __init__(self, src=0, width=640, height=480):
        backend = cv2.CAP_DSHOW if os.name == 'nt' else 0
        self.capture = cv2.VideoCapture(src, backend)
        if not self.capture.isOpened():
            self.capture = cv2.VideoCapture(src)
        try:
            self.capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
        except Exception:
            pass
        try:
            self.capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
        except Exception:
            pass
        self.capture.set(cv2.CAP_PROP_FRAME_WIDTH, width)
        self.capture.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
        self.status, self.frame = self.capture.read()
        self.stopped = False

    def start(self):
        Thread(target=self.update, args=(), daemon=True).start()
        return self

    def update(self):
        while True:
            if self.stopped:
                return
            status, frame = self.capture.read()
            if status and frame is not None:
                self.status, self.frame = status, frame

    def read(self):
        return self.status, self.frame

    def release(self):
        self.stopped = True
        try:
            self.capture.release()
        except Exception:
            pass

def _as_probs(x: np.ndarray) -> np.ndarray:
    x = np.asarray(x).astype(np.float32).reshape(-1)
    if np.any(x < 0) or abs(float(np.sum(x)) - 1.0) > 0.05:
        x = softmax(x)
    return x

# Fusion mapping (MLP always available; MobileNet optional)
FUSION_CLASS_TO_INDEX = {c: i for i, c in enumerate(FUSION_LABELS)}
MLP_TO_FUSION = np.asarray([FUSION_CLASS_TO_INDEX.get(c, -1) for c in MLP_LABELS], dtype=np.int32)
MN_TO_FUSION = np.asarray([FUSION_CLASS_TO_INDEX.get(c, -1) for c in MN_LABELS], dtype=np.int32) if USE_MOBILENET else None

def align_probs_to_fusion(probs_1d: np.ndarray, index_map: np.ndarray) -> np.ndarray:
    out = np.zeros((len(FUSION_LABELS),), dtype=np.float32)
    if probs_1d is None or index_map is None:
        return out
    n = min(int(probs_1d.shape[0]), int(index_map.shape[0]))
    for src_i in range(n):
        dst_i = int(index_map[src_i])
        if dst_i >= 0:
            out[dst_i] = float(probs_1d[src_i])
    s = float(out.sum())
    if s > 0:
        out /= s
    return out

def fuse_probs(mn_probs_fusion: np.ndarray, mlp_probs_fusion: np.ndarray) -> np.ndarray:
    if mn_probs_fusion is None or float(np.sum(mn_probs_fusion)) <= 0:
        return mlp_probs_fusion.copy()
    mn_max = float(np.max(mn_probs_fusion))
    mlp_max = float(np.max(mlp_probs_fusion))
    w_mn, w_mlp = mn_max, mlp_max
    denom = w_mn + w_mlp
    if denom <= 1e-9:
        return mlp_probs_fusion.copy()
    return (w_mn * mn_probs_fusion + w_mlp * mlp_probs_fusion) / denom

def compute_fps():
    if len(fps_times) < 2:
        return 0.0
    dt = fps_times[-1] - fps_times[0]
    if dt <= 1e-9:
        return 0.0
    return (len(fps_times) - 1) / dt

gpu_status = 'No (CPU mode)'
cap = ThreadedCamera(CAMERA_INDEX, width=FRAME_W, height=FRAME_H).start()
time.sleep(0.4)
if not cap.status or cap.frame is None:
    raise RuntimeError('Camera could not start. Try changing CAMERA_INDEX.')
print('Camera opened. Press q to quit, c to clear sentence.')

frame_i = 0
last_mlp_probs = np.zeros((len(FUSION_LABELS),), dtype=np.float32)
last_mn_probs = np.zeros((len(FUSION_LABELS),), dtype=np.float32)
last_two_hands = False
last_bbox = None
status_text = ''
status_color = (200, 200, 200)

while True:
    ret, frame = cap.read()
    if not ret or frame is None:
        continue

    # FIX: Do NOT flip the frame before MediaPipe processing.
    fps_times.append(time.time())
    frame_i += 1

    do_process = (frame_i % PROCESS_EVERY_N_FRAMES == 0)

    stable_label = None
    stable_conf = 0.0
    two_hands = False

    if do_process:
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb)
        two_hands = bool(results.multi_hand_landmarks and len(results.multi_hand_landmarks) > 1)
        last_two_hands = two_hands

        if results.multi_hand_landmarks and not two_hands:
            hand_landmarks = results.multi_hand_landmarks[0]
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            try:
                feats = extract_landmark_features(hand_landmarks)
                mlp_raw = _as_probs(mlp_model.predict(feats, verbose=0)[0])
                last_mlp_probs = align_probs_to_fusion(mlp_raw, MLP_TO_FUSION)
            except Exception as e:
                print(f'⚠️  MLP predict error: {e}')
                last_mlp_probs[:] = 0.0

            if USE_MOBILENET and (frame_i % RUN_MOBILENET_EVERY_N == 0):
                try:
                    x_min, y_min, x_max, y_max = landmarks_bbox_px(hand_landmarks, frame.shape, pad_w=PAD_W, pad_h=PAD_H)
                    crop = frame[y_min:y_max, x_min:x_max]
                    last_bbox = (x_min, y_min, x_max, y_max)
                    if crop.shape[0] > 0 and crop.shape[1] > 0:
                        resized = cv2.resize(crop, MOBILENET_INPUT)
                        inp = (resized.astype(np.float32) / 255.0)[None, ...]
                        mn_raw = _as_probs(mobilenet_model.predict(inp, verbose=0)[0])
                        last_mn_probs = align_probs_to_fusion(mn_raw, MN_TO_FUSION)
                except Exception as e:
                    print(f'⚠️  MobileNet predict error: {e}')
                    last_mn_probs[:] = 0.0
            else:
                last_mn_probs[:] = 0.0

            # Fuse + smooth
            fused = fuse_probs(last_mn_probs if USE_MOBILENET else None, last_mlp_probs)
            if ema_probs is None:
                ema_probs = fused
            else:
                ema_probs = (EMA_ALPHA * ema_probs + (1.0 - EMA_ALPHA) * fused).astype(np.float32)
                s = float(ema_probs.sum())
                if s > 0:
                    ema_probs /= s

            idx = int(np.argmax(ema_probs))
            label = FUSION_LABELS[idx]
            conf = float(ema_probs[idx])

            label_history.append(label)
            if len(label_history) == STABLE_WINDOW:
                most, cnt = Counter(label_history).most_common(1)[0]
                if cnt >= STABLE_MIN_COUNT:
                    stable_label = most
                    stable_idx = int(FUSION_INDEX.get(most, idx))
                    stable_conf = float(ema_probs[stable_idx])

            # --- COMMIT-ONCE-THEN-WAIT LOGIC ---
            now = time.time()
            if stable_label and stable_conf >= CONF_THRESHOLD:
                # If waiting after a commit, check whether sign changed
                if waiting_for_change:
                    if stable_label == committed_label:
                        # Same sign still held — keep waiting
                        status_text = f'{to_display_label(stable_label)} ({stable_conf:.2f}) Committed - change sign'
                        status_color = (255, 200, 0)
                    else:
                        # Different sign! Unlock
                        waiting_for_change = False
                        committed_label = None
                        current_sign_label = stable_label
                        current_sign_start = now
                        status_text = f'{to_display_label(stable_label)} ({stable_conf:.2f}) New sign detected'
                        status_color = (0, 255, 255)

                if not waiting_for_change:
                    # Track hold time for current sign
                    if stable_label != current_sign_label:
                        current_sign_label = stable_label
                        current_sign_start = now

                    hold_duration = now - current_sign_start if current_sign_start else 0

                    if hold_duration < HOLD_TIME_REQUIRED:
                        hold_pct = hold_duration / HOLD_TIME_REQUIRED * 100
                        status_text = f'{to_display_label(stable_label)} ({stable_conf:.2f}) Hold: {hold_pct:.0f}%'
                        status_color = (0, 255, 255)
                    else:
                        # COMMIT
                        commit = NAME_TO_ARABIC.get(stable_label, stable_label)
                        if commit is None:
                            pass
                        elif commit not in ['nothing', 'del', 'space']:
                            predicted_sentence += str(commit)
                        elif commit == 'space':
                            predicted_sentence += ' '
                        elif commit == 'del':
                            predicted_sentence = predicted_sentence[:-1]
                        
                        committed_label = stable_label
                        waiting_for_change = True
                        current_sign_label = None
                        current_sign_start = None
                        label_history.clear()
                        ema_probs = None
                        
                        status_text = f'{to_display_label(stable_label)} ({stable_conf:.2f}) COMMITTED!'
                        status_color = (0, 255, 0)
            elif stable_label:
                status_text = f'{to_display_label(stable_label)} ({stable_conf:.2f}) Low conf'
                status_color = (0, 100, 255)
            else:
                status_text = 'Stabilizing...'
                status_color = (200, 200, 200)

        elif not (results.multi_hand_landmarks and two_hands):
            # No hand detected → FULL RESET (allows re-doing same letter)
            committed_label = None
            waiting_for_change = False
            current_sign_label = None
            current_sign_start = None
            label_history.clear()
            ema_probs = None
            status_text = 'No hand'
            status_color = (150, 150, 150)

    # FIX: Flip the frame AFTER MediaPipe processing for selfie-view display
    frame = cv2.flip(frame, 1)

    # Draw bbox if we have one
    if last_bbox is not None:
        x_min, y_min, x_max, y_max = last_bbox
        h_frame, w_frame = frame.shape[:2]
        flipped_x_min = w_frame - x_max
        flipped_x_max = w_frame - x_min
        cv2.rectangle(frame, (flipped_x_min, y_min), (flipped_x_max, y_max), (0, 255, 0), 2)

    # UI
    fps = compute_fps()
    hud_y = 28
    frame = put_text(frame, f'FPS: {fps:.1f}', (12, hud_y), color=(255,255,255), font_scale=0.8, thickness=2)
    hud_y += 26
    frame = put_text(frame, f'Mode: CPU', (12, hud_y), color=(255,255,255), font_scale=0.8, thickness=2)
    hud_y += 26
    if last_two_hands:
        frame = put_text(frame, 'Only one hand allowed', (12, hud_y), color=(0,0,255), font_scale=0.9, thickness=3)
    else:
        frame = put_text(frame, status_text, (12, hud_y), color=status_color, font_scale=0.9, thickness=3)

    h, w = frame.shape[:2]
    bar_h = 60
    cv2.rectangle(frame, (0, h - bar_h), (w, h), (0, 0, 0), -1)
    frame = put_text(frame, predicted_sentence[-50:], (12, h - 22), color=(255,255,255), font_scale=1.0, thickness=2)

    cv2.imshow('Arabic Sign Recognition (CPU)', frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('c'):
        predicted_sentence = ''
        committed_label = None
        waiting_for_change = False
        current_sign_label = None
        current_sign_start = None
        label_history.clear()
        ema_probs = None
        print('Sentence cleared')

cap.release()
cv2.destroyAllWindows()
