In [11]:
# cai dat cac thu vien can thiet
import os
import cv2
import numpy as np

import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt
from PIL import Image

import io
import zipfile

from google.colab import files
from google.colab.patches import cv2_imshow


In [None]:
# xu ly hinh anh
def resize_image(image, target_size, preserve_aspect_ratio=True):
    """
    Resize an image to the target size with aspect ratio preserved
    """
    target_w, target_h = target_size

    if isinstance(image, np.ndarray):
        # OpenCV image
        if preserve_aspect_ratio:
            h0, w0 = image.shape[:2]
            scale = min(target_w / w0, target_h / h0)
            new_w, new_h = int(w0 * scale), int(h0 * scale)

            # Resize theo tỉ lệ
            resized = cv2.resize(image, (new_w, new_h))

            # Tạo ảnh nền đen
            if len(image.shape) == 3:
                result = np.zeros((target_h, target_w, image.shape[2]), dtype=image.dtype)
            else:
                result = np.zeros((target_h, target_w), dtype=image.dtype)

            # Tính toán padding để căn giữa
            pad_w = target_w - new_w
            pad_h = target_h - new_h
            pad_left = pad_w // 2
            pad_top = pad_h // 2

            # Đặt ảnh resize vào giữa
            if len(image.shape) == 3:
                result[pad_top:pad_top+new_h, pad_left:pad_left+new_w, :] = resized
            else:
                result[pad_top:pad_top+new_h, pad_left:pad_left+new_w] = resized

            return result
        else:
            return cv2.resize(image, target_size)
    else:
        # PIL Image
        if preserve_aspect_ratio:
            w0, h0 = image.size
            scale = min(target_w / w0, target_h / h0)
            new_w, new_h = int(w0 * scale), int(h0 * scale)

            resized = image.resize((new_w, new_h), Image.LANCZOS)

            if image.mode == "L":
                result = Image.new("L", (target_w, target_h), 0)
            else:
                result = Image.new("RGB", (target_w, target_h), (0, 0, 0))

            x_offset = (target_w - new_w) // 2
            y_offset = (target_h - new_h) // 2

            result.paste(resized, (x_offset, y_offset))
            return result
        else:
            return image.resize(target_size, Image.LANCZOS)

def convert_to_grayscale(image):
    """
    Convert an image to grayscale
    """
    if isinstance(image, np.ndarray):
        return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        return image.convert("L")

def extract_hand_region(image, roi_size=400):
    """
    Extract the hand region from the image using skin color detection
    """
    # Define region of interest (ROI) in the center-right of the frame
    height, width = image.shape[:2]
    roi_x = width // 2 - roi_size // 2
    roi_y = height // 2 - roi_size // 2

    # Ensure ROI is within image bounds
    roi_x = max(0, min(roi_x, width - roi_size))
    roi_y = max(0, min(roi_y, height - roi_size))

    # Extract ROI
    roi = image[roi_y:roi_y+roi_size, roi_x:roi_x+roi_size].copy()

    # Convert to HSV color space
    hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

    # Define range for skin color detection
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)

    # Create binary mask for skin color
    mask = cv2.inRange(hsv, lower_skin, upper_skin)

    # Apply morphological operations to improve the mask
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.dilate(mask, kernel, iterations=2)
    mask = cv2.erode(mask, kernel, iterations=1)

    # Apply Gaussian blur to reduce noise
    mask = cv2.GaussianBlur(mask, (5, 5), 0)

    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Draw ROI boundary for visual guidance
    cv2.rectangle(image, (roi_x, roi_y), (roi_x + roi_size, roi_y + roi_size),
                 (255, 0, 0), 2)

    if contours:
        # Find the largest contour (assume it's the hand)
        max_contour = max(contours, key=cv2.contourArea)
        contour_area = cv2.contourArea(max_contour)

        # If the contour is large enough (avoid small noise)
        if contour_area > 3000:
            # Get bounding rectangle
            x, y, w, h = cv2.boundingRect(max_contour)

            # Add some padding
            padding = 20
            x = max(0, x - padding)
            y = max(0, y - padding)
            w = min(roi_size - x, w + 2*padding)
            h = min(roi_size - y, h + 2*padding)

            # Extract the hand region from the ROI
            hand = roi[y:y+h, x:x+w]

            # Draw the rectangle on original image for visualization
            cv2.rectangle(image, (roi_x + x, roi_y + y),
                         (roi_x + x + w, roi_y + y + h), (0, 255, 0), 2)

            # Display contour area for debugging
            cv2.putText(image, f"Area: {contour_area:.0f}", (roi_x, roi_y - 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

            if hand.size > 0:
                # Thêm text khi phát hiện tay
                cv2.putText(image, "Hand Detected", (roi_x, roi_y - 10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                return hand

    # If no good hand region is detected
    cv2.putText(image, "No hand detected", (roi_x, roi_y - 10),
               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # Return a blurred version of ROI instead of the original one
    blurred_roi = cv2.GaussianBlur(roi, (25, 25), 0)
    return blurred_roi

def draw_prediction(image, sign, confidence):
    """
    Draw prediction text on the image
    """
    # Create a copy of the image
    result = image.copy()

    # Draw a semi-transparent rectangle for text background
    overlay = result.copy()
    cv2.rectangle(overlay, (10, 10), (300, 140), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.6, result, 0.4, 0, result)

    # Define text to display
    if sign == "?" or confidence < 0.5:
        text = "Waiting for hand gesture..."
        color = (0, 0, 255)  # Red
    else:
        text = f"Sign: {sign} ({confidence:.2f})"
        color = (0, 255, 0)  # Green

    # Draw the text
    cv2.putText(result, text, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    # Draw hand placement guide
    height, width = image.shape[:2]
    roi_size = 300
    roi_x = width // 2 - roi_size // 2
    roi_y = height // 2 - roi_size // 2
    cv2.rectangle(result, (roi_x, roi_y), (roi_x + roi_size, roi_y + roi_size),
                 (255, 0, 0), 2)

    return result

In [None]:
#mo hinh SignLanguageModel
class SignLanguageModel:
    def __init__(self, model_path=None):
        self.model_path = model_path
        self.model = None
        self.labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                       'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
                       'space', 'delete', 'nothing']

        if self.model_path:
            self.load_model()
        else:
            self._create_model()

    def _create_model(self):
        """Create a CNN model for sign language recognition"""
        model = tf.keras.Sequential([
            # Lớp Conv2D đầu tiên
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
            tf.keras.layers.MaxPooling2D((2, 2)),

            # Lớp Conv2D thứ hai
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),

            # Lớp Conv2D thứ ba
            tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),

            # Flatten và các lớp fully connected
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(len(self.labels), activation='softmax')
        ])

        model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        self.model = model
        print("Created a new model. Note: This model is untrained.")

    def load_model(self):
        try:
            self.model = tf.keras.models.load_model(self.model_path)
            print(f"Model loaded successfully from {self.model_path}")
        except Exception as e:
            print(f"Error loading model: {e}")
            print("Creating a new model instead...")
            self._create_model()

    def preprocess_image(self, image):
        """Preprocess the input image for prediction"""
        # Lấy hàm resize_image từ shared variables
        resize_image = load_variable('resize_image')
        if resize_image is None:
            # Fallback nếu không tìm thấy hàm
            from 2_Image_Processing import resize_image

        # Convert OpenCV image to PIL Image
        if len(image.shape) == 3:  # Color image
            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        else:  # Already grayscale
            image = Image.fromarray(image)

        # Resize image
        image = resize_image(image, (64, 64), preserve_aspect_ratio=True)

        # Convert to grayscale if needed
        if isinstance(image, Image.Image):
            if image.mode != 'L':
                image = image.convert('L')
            img_array = np.array(image)
        else:
            if len(image.shape) == 3:
                img_array = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:
                img_array = image

        # Normalize and reshape
        img_array = img_array.astype('float32') / 255.0
        img_array = img_array.reshape(1, 64, 64, 1)

        return img_array

    def predict_sign(self, image):
        """Predict the sign from an image"""
        # Ensure image is grayscale
        if len(image.shape) == 3:
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray_image = image

        # Check if the image has enough variation to be a hand
        std_dev = np.std(gray_image)
        if std_dev < 20:  # If low variation, likely no hand
            return "?", 0.0

        # Process image
        processed_image = self.preprocess_image(gray_image)

        # Make prediction
        predictions = self.model.predict(processed_image, verbose=0)

        # Get the index of the highest confidence prediction
        predicted_index = np.argmax(predictions[0])
        confidence = predictions[0][predicted_index]

        # Map the index to the sign
        if predicted_index < 26:  # A-Z
            predicted_sign = chr(65 + predicted_index)
        else:
            special_classes = {26: "space", 27: "delete", 28: "nothing"}
            predicted_sign = special_classes.get(predicted_index, "?")

        return predicted_sign, confidence

In [None]:
# huan luyen mo hinh
def train_model(data_dir, epochs=None, batch_size=None):
    """
    Train the sign language model with automatic parameter tuning
    """
    print(f"Training model with data from {data_dir}")

    # Ensure the data directory exists
    if not os.path.exists(data_dir):
        print(f"Error: Directory {data_dir} does not exist")
        return None

    # Count samples and classes
    total_images = 0
    min_class_images = float('inf')
    classes = []

    for item in os.listdir(data_dir):
        item_path = os.path.join(data_dir, item)
        if os.path.isdir(item_path):
            classes.append(item)
            img_count = len([f for f in os.listdir(item_path)
                          if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
            total_images += img_count
            min_class_images = min(min_class_images, img_count)
            print(f"  - Class {item}: {img_count} images")

    print(f"Total classes: {len(classes)}")
    print(f"Total images: {total_images}")
    print(f"Minimum images per class: {min_class_images}")

    if len(classes) == 0:
        print("Error: No class directories found!")
        return None

    # Auto-calculate batch_size and epochs if not provided
    if batch_size is None:
        if min_class_images <= 8:
            batch_size = 1
        elif min_class_images <= 16:
            batch_size = 4
        elif min_class_images <= 32:
            batch_size = 8
        elif min_class_images <= 64:
            batch_size = 16
        else:
            batch_size = 32
        print(f"Auto-selected batch_size = {batch_size}")

    if epochs is None:
        if total_images < 100:
            epochs = 30
        elif total_images < 500:
            epochs = 20
        elif total_images < 1000:
            epochs = 15
        else:
            epochs = 10
        print(f"Auto-selected epochs = {epochs}")

    # Initialize model
    model = SignLanguageModel()

    # Load and prepare data
    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=(64, 64),
        batch_size=batch_size,
        color_mode='grayscale',
        shuffle=True
    )

    validation_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=(64, 64),
        batch_size=batch_size,
        color_mode='grayscale',
        shuffle=True
    )

    # Normalize data
    normalization_layer = tf.keras.layers.Rescaling(1./255)
    train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
    validation_ds = validation_ds.map(lambda x, y: (normalization_layer(x), y))

    # Data augmentation - adjust strength based on dataset size
    augmentation_strength = 0.2 if total_images > 1000 else (0.3 if total_images > 500 else 0.4)
    data_augmentation = tf.keras.Sequential([
        tf.keras.layers.RandomRotation(augmentation_strength),
        tf.keras.layers.RandomTranslation(augmentation_strength, augmentation_strength),
        tf.keras.layers.RandomZoom(augmentation_strength),
        tf.keras.layers.RandomContrast(augmentation_strength)
    ])

    # Apply augmentation
    train_ds = train_ds.map(lambda x, y: (data_augmentation(x), y))

    # Optimize performance
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    validation_ds = validation_ds.prefetch(buffer_size=AUTOTUNE)

    # Callbacks
    early_stopping_patience = 5 if total_images < 500 else 3
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            patience=early_stopping_patience,
            restore_best_weights=True,
            verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            factor=0.2,
            patience=2,
            verbose=1
        )
    ]

     # Train the model
    print("Starting model training...")
    model.model.fit(
        train_ds,
        validation_data=validation_ds,
        epochs=epochs,
        callbacks=callbacks,
        verbose=1
    )

    # Save the model
    model_file = "models/sign_model.h5"
    model.model.save(model_file)
    print(f"Model saved to {model_file}")

    return model

In [None]:
#danh gia mo hinh
def visualize_dataset(data_dir='dataset', num_samples=3):
    """
    Hiển thị một số mẫu từ dataset có sẵn
    """
    if not os.path.exists(data_dir):
        print(f"Thư mục {data_dir} không tồn tại.")
        return


    classes = sorted([d for d in os.listdir(data_dir)
                    if os.path.isdir(os.path.join(data_dir, d))])


    if not classes:
        print(f"Không tìm thấy lớp nào trong {data_dir}")
        return


    # Đếm tổng số lớp và tạo layout phù hợp
    num_classes = len(classes)
    fig = plt.figure(figsize=(12, 2*num_classes))


    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        images = [f for f in os.listdir(class_dir)
                if f.lower().endswith(('.jpg', '.jpeg', '.png'))]


        if not images:
            print(f"Không tìm thấy ảnh nào trong lớp {class_name}")
            continue


        # Lấy tất cả hoặc số lượng mẫu yêu cầu
        samples = images[:num_samples] if len(images) > num_samples else images


        for j, image_name in enumerate(samples):
            # Tạo vị trí subplot
            idx = i * num_samples + j + 1
            ax = fig.add_subplot(num_classes, num_samples, idx)


            # Đọc và hiển thị ảnh
            image_path = os.path.join(class_dir, image_name)
            img = cv2.imread(image_path)


            if img is not None:
                # Chuyển sang RGB để hiển thị đúng màu
                if len(img.shape) == 3:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


                ax.imshow(img if len(img.shape) == 3 else img, cmap='gray' if len(img.shape) == 2 else None)
                ax.set_title(f"{class_name}")
                ax.axis('off')


    plt.tight_layout()
    plt.show()


In [None]:
dataset_path = '/content/drive/MyDrive/Colab Notebooks/computer_vision/dataset'


# # Để hiển thị một vài ảnh trong dataset:
# visualize_dataset(dataset_path)


# Để huấn luyện mô hình với dataset này:
trained_model = train_model(dataset_path)


In [None]:
# Khởi tạo mô hình và tải weights đã huấn luyện
model_path = '/content/drive/MyDrive/Colab Notebooks/models/sign_model.h5'
sign_model = SignLanguageModel(model_path=model_path)


# Biên dịch lại mô hình sau khi tải
# Sử dụng cùng optimizer, và metrics khi huấn luyện
sign_model.model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])


print("Mô hình đã sẵn sàng để dự đoán.")


In [None]:
# Tải ảnh tĩnh lên Colab
# uploaded = files.upload()


# Lấy tên file ảnh đã tải lên
img = cv2.imread("/content/drive/MyDrive/Colab Notebooks/computer_vision/chu A.jpg")




# print(f"Đã tải lên ảnh: {img}")


In [None]:
# Đọc ảnh từ file
image = cv2.imread("/content/drive/MyDrive/Colab Notebooks/computer_vision/chu A.jpg")




if image is None:
    print(f"Lỗi: Không thể đọc ảnh từ đường dẫn {image}")
else:
    # Trích xuất vùng tay từ ảnh
    # extract_hand_region sẽ vẽ bounding box lên ảnh gốc
    image_with_roi = image.copy() # Tạo bản sao để vẽ lên
    hand_region = extract_hand_region(image_with_roi)


    # Hiển thị ảnh với vùng ROI và bounding box (nếu tay được phát hiện)
    print("Ảnh với vùng tay được phát hiện:")
    cv2_imshow(image_with_roi)


    if hand_region is not None and hand_region.size > 0:
        # Nếu trích xuất được vùng tay, tiến hành dự đoán
        predicted_sign, confidence = sign_model.predict_sign(hand_region)


        # Hiển thị kết quả dự đoán
        print(f"\nKết quả dự đoán: {predicted_sign} (Độ tin cậy: {confidence:.2f})")


        # Vẽ kết quả dự đoán lên ảnh gốc
        image_with_prediction = draw_prediction(image.copy(), predicted_sign, confidence)
        print("\nẢnh gốc với kết quả dự đoán:")
        cv2_imshow(image_with_prediction)


    else:
        # Nếu không trích xuất được vùng tay
        print("\nKhông phát hiện được vùng tay trong ảnh.")
        # Vẽ thông báo "No hand detected" lên ảnh gốc
        image_with_no_hand = draw_prediction(image.copy(), "?", 0.0)
        print("\nẢnh gốc với thông báo:")
        cv2_imshow(image_with_no_hand)
