In [None]:
import cv2
import mediapipe as mp
import torch
import numpy as np
import json
import time

# --- PATHS ---
base_path = "/home/haggenmueller/asl_detection/machine_learning/models/lstm"
MODEL_PATH = f"{base_path}/best_lstm_model.pth"
LABELS_PATH = f"{base_path}/label_to_index.json"

# --- Mediapipe Init ---
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# --- LSTM Model Class ---
class LSTMModel(torch.nn.Module):
    def __init__(self, input_size=225, hidden_size=256, num_layers=3, output_size=209):
        super(LSTMModel, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out.mean(dim=1)) 
        return torch.log_softmax(out, dim=1)  # Softmax für stabilere Ausgabe
        

# --- Load Model ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel().to(device)

try:
    checkpoint = torch.load(MODEL_PATH, map_location=device)
    model.load_state_dict(checkpoint)
    model.eval()
    print("✅ Model loaded successfully!")
except Exception as e:
    print(f"❌ Model loading error: {e}")
    exit()

# --- Load Labels ---
try:
    with open(LABELS_PATH, "r") as f:
        label_to_index = json.load(f)
    index_to_label = {v: k for k, v in label_to_index.items()}
    print(f"✅ {len(label_to_index)} Labels loaded!")
except Exception as e:
    print(f"❌ Error loading labels: {e}")
    exit()

# --- Extract Keypoints ---
def extract_keypoints(results):
    try:
        pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 3)
        left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
        right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
        keypoints = np.concatenate([pose, left_hand, right_hand])
        if np.isnan(keypoints).any():
            print("⚠️ Warning: NaN detected in keypoints!")
        return keypoints
    except Exception as e:
        print(f"❌ Error extracting keypoints: {e}")
        return np.zeros(225)  # Sicherstellen, dass Shape bleibt

# --- Capture from Webcam ---
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("❌ Error: Webcam konnte nicht geöffnet werden!")
    exit()

sequence = []
frame_count = 102
last_prediction_time = time.time()
predicted_word = "Warten..."

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("❌ Error: Kein Bild von Kamera erhalten!")
            break

        # Process image
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)

        # Convert back to BGR for OpenCV
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Extract keypoints
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)

        # Maintain sequence length
        if len(sequence) > frame_count:
            sequence.pop(0)

        # Predict only if we have enough frames
        if len(sequence) == frame_count and (time.time() - last_prediction_time) > 1.0:  # Nur jede Sekunde eine Prediction
            input_tensor = torch.tensor([sequence], dtype=torch.float32).to(device)
            with torch.no_grad():
                output = model(input_tensor)
            pred_label = torch.argmax(output, dim=1).item()
            predicted_word = index_to_label.get(pred_label, "Unbekannt")
            last_prediction_time = time.time()

        # Display recognized word and frame count
        cv2.putText(image, f"Erkannt: {predicted_word}", (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, f"Frames: {len(sequence)}/102", (10, 80),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Draw landmarks
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

        # Show video feed
        cv2.imshow("ASL Testmodus", image)

        # Exit on 'q' press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

❌ Model loading error: [Errno 2] No such file or directory: '/home/haggenmueller/asl_detection/machine_learning/models/lstm/best_lstm_model.pth'
❌ Error loading labels: [Errno 2] No such file or directory: '/home/haggenmueller/asl_detection/machine_learning/models/lstm/label_to_index.json'


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1741546377.955342   42971 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741546377.983773   42982 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741546377.987151   42972 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741546377.987526   42977 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741546377.987526   42982 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741546377.996573   42

NameError: name 'index_to_label' is not defined

: 