In [65]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import time

# ✅ Load model and class labels
CLASS_NAMES = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ["Nothing", "Space"]
model = tf.keras.models.load_model("C:\\Users\\admin\\Downloads\\Hand_Alphabets15.h5")
print("✅ Model loaded successfully!")

# ✅ Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# ✅ Start webcam
cap = cv2.VideoCapture(0)

# Time tracker for FPS
prev_time = 0

# ✅ Sentence building
current_letter = ""
sentence = ""
last_prediction = ""
prediction_start_time = 0
prediction_delay = 1.5 # seconds

# ✅ Image preprocessing function
def preprocess_for_model(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (256, 256))
    normalized = resized.astype(np.float32) / 255.0
    return np.expand_dims(normalized, axis=(0, -1))  

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    result = hands.process(rgb)
    prediction_text = "No hand detected"
    hand_segment_gray = np.zeros((512, 512), dtype=np.uint8)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Get bounding box
            x_min, y_min = w, h
            x_max = y_max = 0
            for lm in hand_landmarks.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                x_min, y_min = min(x_min, x), min(y_min, y)
                x_max, y_max = max(x_max, x), max(y_max, y)

            padding = 30
            x_min = max(x_min - padding, 0)
            y_min = max(y_min - padding, 0)
            x_max = min(x_max + padding, w)
            y_max = min(y_max + padding, h)

            if x_max <= x_min or y_max <= y_min:
                continue

            hand_roi = frame[y_min:y_max, x_min:x_max]

            # ✅ Skin segmentation (YCrCb)
            ycrcb = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2YCrCb)
            lower_skin = np.array([0, 133, 77], dtype=np.uint8)
            upper_skin = np.array([255, 173, 127], dtype=np.uint8)
            mask = cv2.inRange(ycrcb, lower_skin, upper_skin)

            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
            mask = cv2.erode(mask, kernel, iterations=1)
            mask = cv2.dilate(mask, kernel, iterations=1)
            mask = cv2.GaussianBlur(mask, (7, 7), 0)

            hand_segment = cv2.bitwise_and(hand_roi, hand_roi, mask=mask)

            # ✅ Predict
            input_img = preprocess_for_model(hand_segment)
            preds = model.predict(input_img, verbose=0)
            class_id = int(np.argmax(preds))
            confidence = float(np.max(preds))
            predicted_class = CLASS_NAMES[class_id]
            prediction_text = f"{predicted_class} ({confidence*100:.1f}%)"

            # ✅ Stability logic for sentence building
            if predicted_class == last_prediction:
                if time.time() - prediction_start_time >= prediction_delay:
                    if predicted_class == "Space":
                        sentence += " "
                    elif predicted_class != "Nothing":
                        sentence += predicted_class
                    prediction_start_time = time.time()  # reset timer to prevent repeats
            else:
                last_prediction = predicted_class
                prediction_start_time = time.time()

            # ✅ Draw
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.putText(frame, prediction_text, (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Show grayscale model input
            hand_segment_gray = cv2.cvtColor(hand_segment, cv2.COLOR_BGR2GRAY)
            hand_segment_gray = cv2.resize(hand_segment_gray, (512, 512))

    # ✅ Combine webcam + model input view
    frame_disp = cv2.resize(frame, (512, 512))
    hand_disp = cv2.cvtColor(hand_segment_gray, cv2.COLOR_GRAY2BGR)
    combined = np.hstack((frame_disp, hand_disp))

    # ✅ FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time + 1e-6)
    prev_time = curr_time
    cv2.putText(combined, sentence, (10, 50),
            cv2.FONT_HERSHEY_SIMPLEX, 1.5,
            (255, 255, 255), 3, cv2.LINE_AA)
    cv2.putText(combined, f"Sentence: {sentence}", (10, 500),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    cv2.imshow("Webcam Feed (Left) + Model Input (Right)", combined)

    key = cv2.waitKey(1)
    if key in [27, ord('q')]:
        break
    elif key == ord('c'):
        sentence = ""

cap.release()
cv2.destroyAllWindows()



✅ Model loaded successfully!


In [92]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import time

# ✅ Load model and class labels
CLASS_NAMES = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ["Nothing", "Space"]
model = tf.keras.models.load_model("C:\\Users\\admin\\Downloads\\Hand_Alphabets11.h5")
print("✅ Model loaded successfully!")

# ✅ Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# ✅ Start webcam
cap = cv2.VideoCapture(0)

# Time tracker for FPS
prev_time = 0

# ✅ Sentence building
current_letter = ""
sentence = ""
last_prediction = ""
prediction_start_time = 0
prediction_delay = 1.5  # seconds

# ✅ Image preprocessing function
def preprocess_for_model(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (256, 256))
    normalized = resized.astype(np.float32) / 255.0
    return np.expand_dims(normalized, axis=(0, -1))  

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    result = hands.process(rgb)
    prediction_text = "No hand detected"
    hand_segment_gray = np.zeros((512, 512), dtype=np.uint8)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Get bounding box
            x_min, y_min = w, h
            x_max = y_max = 0
            for lm in hand_landmarks.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                x_min, y_min = min(x_min, x), min(y_min, y)
                x_max, y_max = max(x_max, x), max(y_max, y)

            padding = 30
            x_min = max(x_min - padding, 0)
            y_min = max(y_min - padding, 0)
            x_max = min(x_max + padding, w)
            y_max = min(y_max + padding, h)

            if x_max <= x_min or y_max <= y_min:
                continue

            hand_roi = frame[y_min:y_max, x_min:x_max]

            # ✅ Skin segmentation (YCrCb)
            ycrcb = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2YCrCb)
            lower_skin = np.array([0, 133, 77], dtype=np.uint8)
            upper_skin = np.array([255, 173, 127], dtype=np.uint8)
            mask = cv2.inRange(ycrcb, lower_skin, upper_skin)

            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
            mask = cv2.erode(mask, kernel, iterations=1)
            mask = cv2.dilate(mask, kernel, iterations=1)
            mask = cv2.GaussianBlur(mask, (7, 7), 0)

            hand_segment = cv2.bitwise_and(hand_roi, hand_roi, mask=mask)

            # ✅ Predict
            input_img = preprocess_for_model(hand_segment)
            preds = model.predict(input_img, verbose=0)
            class_id = int(np.argmax(preds))
            confidence = float(np.max(preds))
            predicted_class = CLASS_NAMES[class_id]
            prediction_text = f"{predicted_class} ({confidence*100:.1f}%)"

            # ✅ Stability logic for sentence building
            if predicted_class == last_prediction:
                if time.time() - prediction_start_time >= prediction_delay:
                    if predicted_class == "Space":
                        sentence += " "
                    elif predicted_class != "Nothing":
                        sentence += predicted_class
                    prediction_start_time = time.time()  # reset timer
            else:
                last_prediction = predicted_class
                prediction_start_time = time.time()

            # ✅ Draw
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.putText(frame, prediction_text, (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # ✅ Show grayscale model input
            hand_segment_gray = cv2.cvtColor(hand_segment, cv2.COLOR_BGR2GRAY)
            hand_segment_gray = cv2.resize(hand_segment_gray, (512, 512))

    # ✅ Combine webcam + model input view
    frame_disp = cv2.resize(frame, (512, 512))
    hand_disp = cv2.cvtColor(hand_segment_gray, cv2.COLOR_GRAY2BGR)
    combined = np.hstack((frame_disp, hand_disp))

    # ✅ FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time + 1e-6)
    prev_time = curr_time

    # ✅ Display text in green
    cv2.putText(combined, sentence, (10, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.5,
                (0, 255, 0), 3, cv2.LINE_AA)
    cv2.putText(combined, f"Sentence: {sentence}", (10, 500),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.putText(combined, "'q' or ESC: Quit | 'c': Clear", (10, 480),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)

    cv2.imshow("Webcam Feed (Left) + Model Input (Right)", combined)

    key = cv2.waitKey(1)
    if key in [27, ord('q')]:
        break
    elif key == ord('c'):
        sentence = ""

cap.release()
cv2.destroyAllWindows()




✅ Model loaded successfully!


In [5]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import time

# ✅ Load model and class labels
CLASS_NAMES = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ["Nothing", "Space"]
model = tf.keras.models.load_model("C:\\Users\\admin\\Downloads\\Hand_Alphabets11.h5")
print("✅ Model loaded successfully!")

# ✅ Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)

# ✅ Start webcam
cap = cv2.VideoCapture(0)

# Time tracker for FPS
prev_time = 0

# ✅ Sentence building
current_letter = ""
sentence = ""
last_prediction = ""
prediction_start_time = 0
prediction_delay = 1.5  # seconds

# ✅ Image preprocessing function
def preprocess_for_model(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (256, 256))  # ✅ Match model input
    normalized = resized.astype(np.float32) / 255.0
    return np.expand_dims(normalized, axis=(0, -1))  # Shape: (1, 512, 512, 1)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    result = hands.process(rgb)
    prediction_text = "No hand detected"
    hand_segment_gray = np.zeros((512, 512), dtype=np.uint8)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Get bounding box
            x_min, y_min = w, h
            x_max = y_max = 0
            for lm in hand_landmarks.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                x_min, y_min = min(x_min, x), min(y_min, y)
                x_max, y_max = max(x_max, x), max(y_max, y)

            padding = 30
            x_min = max(x_min - padding, 0)
            y_min = max(y_min - padding, 0)
            x_max = min(x_max + padding, w)
            y_max = min(y_max + padding, h)

            if x_max <= x_min or y_max <= y_min:
                continue

            hand_roi = frame[y_min:y_max, x_min:x_max]

            # ✅ Skin segmentation (YCrCb)
            ycrcb = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2YCrCb)
            lower_skin = np.array([0, 133, 77], dtype=np.uint8)
            upper_skin = np.array([255, 173, 127], dtype=np.uint8)
            mask = cv2.inRange(ycrcb, lower_skin, upper_skin)

            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
            mask = cv2.erode(mask, kernel, iterations=1)
            mask = cv2.dilate(mask, kernel, iterations=1)
            mask = cv2.GaussianBlur(mask, (5, 5), 0)

            hand_segment = cv2.bitwise_and(hand_roi, hand_roi, mask=mask)

            # ✅ Predict
            input_img = preprocess_for_model(hand_segment)
            preds = model.predict(input_img, verbose=0)
            class_id = int(np.argmax(preds))
            confidence = float(np.max(preds))
            predicted_class = CLASS_NAMES[class_id]
            prediction_text = f"{predicted_class} ({confidence * 100:.1f}%)"

            # ✅ Stability logic for sentence building
            if predicted_class == last_prediction:
                if time.time() - prediction_start_time >= prediction_delay:
                    if predicted_class == "Space":
                        sentence += " "
                    elif predicted_class != "Nothing":
                        sentence += predicted_class
                    prediction_start_time = time.time()  # reset timer
            else:
                last_prediction = predicted_class
                prediction_start_time = time.time()

            # ✅ Draw results
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            cv2.putText(frame, prediction_text, (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # ✅ Show grayscale model input
            hand_segment_gray = cv2.cvtColor(hand_segment, cv2.COLOR_BGR2GRAY)
            hand_segment_gray = cv2.resize(hand_segment_gray, (512, 512))

    # ✅ Combine webcam + model input view
    frame_disp = cv2.resize(frame, (512, 512))
    hand_disp = cv2.cvtColor(hand_segment_gray, cv2.COLOR_GRAY2BGR)
    combined = np.hstack((frame_disp, hand_disp))

    # ✅ FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time + 1e-6)
    prev_time = curr_time

    # ✅ Display text and sentence
    cv2.putText(combined, f"Sentence: {sentence}", (10, 500),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.putText(combined, "'q' or ESC: Quit | 'c': Clear", (10, 480),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)

    # ✅ Show output
    cv2.imshow("Webcam Feed (Left) + Model Input (Right)", combined)

    key = cv2.waitKey(1)
    if key in [27, ord('q')]:
        break
    elif key == ord('c'):
        sentence = ""

# ✅ Release and cleanup
cap.release()
cv2.destroyAllWindows()




✅ Model loaded successfully!
