<a href="https://colab.research.google.com/github/erencankur/Sign_Language_Detection/blob/main/Sign_Language_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Sign Language Detection**
Training an artificial intelligence model for sign language detection using TensorFlow and making predictions with this model

## **Code Files:**
1. First Code: Data Collection
2. Second Code: Model Training
3. Third Code: Prediction

## **First Code: Data Collection**

In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp

dataset_path = "dataset"

def create_dataset_folders():
    if not os.path.exists(dataset_path):
        os.makedirs(dataset_path)

    classes = [chr(i) for i in range(65, 91)]
    for class_name in classes:
        class_path = os.path.join(dataset_path, class_name)
        if not os.path.exists(class_path):
            os.makedirs(class_path)

    print("All folders created.")

def initialize_hand_detection():
    mp_hands = mp.solutions.hands
    return mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7
    )

def get_hand_boundaries(hand_landmarks, frame_shape):
    height, width = frame_shape[:2]
    x_min, y_min = width, height
    x_max, y_max = 0, 0
    points = []

    for landmark in hand_landmarks.landmark:
        x, y = int(landmark.x * width), int(landmark.y * height)
        points.append((x, y))
        x_min, y_min = min(x_min, x), min(y_min, y)
        x_max, y_max = max(x_max, x), max(y_max, y)

    return points, (x_min, y_min, x_max, y_max)

def create_hand_mask(points, frame_shape):
    mask = np.zeros(frame_shape[:2], dtype=np.uint8)

    palm_points = np.array([points[0], points[1], points[5], points[17]])
    cv2.fillPoly(mask, [palm_points], 255)

    cv2.line(mask, points[0], points[1], 255, thickness=50)
    cv2.line(mask, points[1], points[5], 255, thickness=50)
    cv2.line(mask, points[5], points[17], 255, thickness=50)
    cv2.line(mask, points[17], points[0], 255, thickness=50)

    for i in range(len(points)-1):
        if i % 4 != 0:
            cv2.line(mask, points[i], points[i + 1], 255, thickness=20)

    kernel = np.ones((25, 25), np.uint8)
    dilated_mask = cv2.dilate(mask, kernel)
    dilated_mask = cv2.GaussianBlur(dilated_mask, (15, 15), 0)

    return dilated_mask

def get_square_boundaries(boundaries, frame_shape):
    x_min, y_min, x_max, y_max = boundaries
    height, width = frame_shape[:2]

    center_x = (x_min + x_max) // 2
    center_y = (y_min + y_max) // 2

    width_hand = x_max - x_min
    height_hand = y_max - y_min

    square_size = int(max(width_hand, height_hand) + 100)

    x_min = center_x - square_size // 2
    y_min = center_y - square_size // 2
    x_max = center_x + square_size // 2
    y_max = center_y + square_size // 2

    if x_min < 0:
        x_max -= x_min
        x_min = 0
    if y_min < 0:
        y_max -= y_min
        y_min = 0
    if x_max > width:
        x_min -= (x_max - width)
        x_max = width
    if y_max > height:
        y_min -= (y_max - height)
        y_max = height

    return x_min, y_min, x_max, y_max

def main():
    create_dataset_folders()
    hands = initialize_hand_detection()
    cap = cv2.VideoCapture(0)

    current_class = input("Enter the class: ")
    class_path = os.path.join(dataset_path, current_class)

    if not os.path.exists(class_path):
        print(f"Folder {current_class} not found.")
        cap.release()
        return

    print(f"Saving images for {current_class}. Press \"s\" to take a photo.")
    image_count = 0

    while True:
        success, frame = cap.read()
        if not success:
            print("Frame not available.")
            break

        frame = cv2.flip(frame, 1)
        result = np.zeros_like(frame)
        display_frame = frame.copy()

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                points, boundaries = get_hand_boundaries(hand_landmarks, frame.shape)
                mask = create_hand_mask(points, frame.shape)
                square_bounds = get_square_boundaries(boundaries, frame.shape)
                x_min, y_min, x_max, y_max = square_bounds

                cv2.rectangle(display_frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

                result = cv2.bitwise_and(frame, frame, mask=mask)

                hand_square = result[y_min:y_max, x_min:x_max]
                if hand_square.size > 0:
                    if cv2.waitKey(1) & 0xFF == ord("s"):
                        square_size = max(hand_square.shape[0], hand_square.shape[1])
                        square_img = np.zeros((square_size, square_size, 3), dtype=np.uint8)

                        y_offset = (square_size - hand_square.shape[0]) // 2
                        x_offset = (square_size - hand_square.shape[1]) // 2

                        square_img[y_offset:y_offset+hand_square.shape[0], x_offset:x_offset+hand_square.shape[1]] = hand_square

                        final_img = cv2.resize(square_img, (64, 64))

                        image_name = f"{image_count + 1}.jpg"
                        image_path = os.path.join(class_path, image_name)
                        cv2.imwrite(image_path, final_img)
                        print(f"{image_name} saved.")
                        image_count += 1

        combined_view = cv2.addWeighted(display_frame, 0.1, result, 0.9, 0)
        cv2.imshow("Data Collection", combined_view)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    print(f"\n{image_count} images saved for {current_class}.")
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

## **Second Code: Model Training**

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import Input, layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

dataset_path = "dataset"
model_path = "model.keras"

def create_model():
    model = Sequential([
        Input(shape=(64, 64, 3)),
        layers.Conv2D(16, (3, 3), activation="relu"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(32, (3, 3), activation="relu"),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(26, activation="softmax")
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.00005),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

def get_image_paths_and_labels(dataset_path):
    image_paths = []
    labels = []

    for label in range(26):
        char_label = chr(label + 65)

        folder_path = os.path.join(dataset_path, char_label)
        if os.path.exists(folder_path):
            for file_name in os.listdir(folder_path):
                if file_name.endswith(".jpg"):
                    image_paths.append(os.path.join(folder_path, file_name))
                    labels.append(label)

    return image_paths, labels

def load_images(image_paths, labels):
    images = []
    for img_path in image_paths:
        img = cv2.imread(img_path)
        img = cv2.resize(img, (64, 64))
        images.append(img)

    return np.array(images), np.array(labels)

def create_data_generator():
    return ImageDataGenerator(
        rescale=1./255,
        rotation_range=5,
        shear_range=0.05,
        zoom_range=0.05,
        horizontal_flip=True,
        fill_mode="nearest"
    )


def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

    ax1.plot(history.history["accuracy"], label="Training Accuracy", marker="o", color="blue")
    ax1.plot(history.history["val_accuracy"], label="Validation Accuracy", marker="o", color="cyan")
    ax1.set_title("Model Accuracy")
    ax1.set_xlabel("Epoch")
    ax1.set_ylabel("Accuracy")
    ax1.legend()
    ax1.grid()

    ax2.plot(history.history["loss"], label="Training Loss", marker="x", color="red")
    ax2.plot(history.history["val_loss"], label="Validation Loss", marker="x", color="orange")
    ax2.set_title("Model Loss")
    ax2.set_xlabel("Epoch")
    ax2.set_ylabel("Loss")
    ax2.legend()
    ax2.grid()

    plt.tight_layout()
    plt.show()

def main():
    model = create_model()

    image_paths, labels = get_image_paths_and_labels(dataset_path)
    x_data, y_data = load_images(image_paths, labels)

    x_train, x_val, y_train, y_val = train_test_split(
        x_data, y_data, test_size=0.2, random_state=42
    )

    datagen = create_data_generator()
    datagen.fit(x_train)

    early_stopping = EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )

    history = model.fit(
        datagen.flow(x_train, y_train, batch_size=32),
        validation_data=(x_val/255.0, y_val),
        epochs=100,
        callbacks=[early_stopping]
    )

    model.save(model_path)

    plot_training_history(history)

if __name__ == "__main__":
    main()

## **Third Code: Prediction**

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model

model_path = "model.keras"

def initialize_hand_detection():
    mp_hands = mp.solutions.hands
    return mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=1,
        min_detection_confidence=0.7,
        min_tracking_confidence=0.7
    )

def load_prediction_model():
    return load_model(model_path)

def get_class_mapping():
    return [chr(i) for i in range(65, 91)]

def process_hand_landmarks(hand_landmarks, frame_shape):
    height, width = frame_shape[:2]
    points = []
    x_min, y_min = width, height
    x_max, y_max = 0, 0

    for landmark in hand_landmarks.landmark:
        x, y = int(landmark.x * width), int(landmark.y * height)
        points.append((x, y))
        x_min, y_min = min(x_min, x), min(y_min, y)
        x_max, y_max = max(x_max, x), max(y_max, y)

    return points, (x_min, y_min, x_max, y_max)

def create_hand_mask(points, frame_shape):
    mask = np.zeros(frame_shape[:2], dtype=np.uint8)

    palm_points = np.array([points[0], points[1], points[5], points[17]])
    cv2.fillPoly(mask, [palm_points], 255)

    cv2.line(mask, points[0], points[1], 255, thickness=50)
    cv2.line(mask, points[1], points[5], 255, thickness=50)
    cv2.line(mask, points[5], points[17], 255, thickness=50)
    cv2.line(mask, points[17], points[0], 255, thickness=50)

    for i in range(len(points)-1):
        if i % 4 != 0:
            cv2.line(mask, points[i], points[i + 1], 255, thickness=20)

    kernel = np.ones((25, 25), np.uint8)
    dilated_mask = cv2.dilate(mask, kernel)
    dilated_mask = cv2.GaussianBlur(dilated_mask, (15, 15), 0)

    return dilated_mask

def get_square_boundaries(boundaries, frame_shape):
    x_min, y_min, x_max, y_max = boundaries
    height, width = frame_shape[:2]

    center_x = (x_min + x_max) // 2
    center_y = (y_min + y_max) // 2

    width_hand = x_max - x_min
    height_hand = y_max - y_min

    square_size = int(max(width_hand, height_hand) + 100)

    new_x_min = center_x - square_size // 2
    new_y_min = center_y - square_size // 2
    new_x_max = center_x + square_size // 2
    new_y_max = center_y + square_size // 2

    if new_x_min < 0:
        new_x_max -= new_x_min
        new_x_min = 0
    if new_y_min < 0:
        new_y_max -= new_y_min
        new_y_min = 0
    if new_x_max > width:
        new_x_min -= (new_x_max - width)
        new_x_max = width
    if new_y_max > height:
        new_y_min -= (new_y_max - height)
        new_y_max = height

    return new_x_min, new_y_min, new_x_max, new_y_max

def predict_hand_sign(model, hand_square, class_mapping):
    hand_square = cv2.resize(hand_square, (64, 64))
    hand_square = hand_square / 255.0
    hand_square = np.expand_dims(hand_square, axis=0)

    predictions = model.predict(hand_square)
    predicted_class = np.argmax(predictions)
    return class_mapping[predicted_class]

def main():
    hands = initialize_hand_detection()
    model = load_prediction_model()
    class_mapping = get_class_mapping()
    cap = cv2.VideoCapture(0)

    prev_prediction = "DNE"
    prediction_counter = 0
    last_stable_prediction = "DNE"
    stable_frames = 20
    output_text = ""

    while True:
        success, frame = cap.read()
        if not success:
            print("Frame not available.")
            break

        frame = cv2.flip(frame, 1)
        result = np.zeros_like(frame)
        display_frame = frame.copy()
        predicted_character = "DNE"

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                points, boundaries = process_hand_landmarks(hand_landmarks, frame.shape)
                mask = create_hand_mask(points, frame.shape)
                square_bounds = get_square_boundaries(boundaries, frame.shape)
                x_min, y_min, x_max, y_max = square_bounds

                cv2.rectangle(display_frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

                result = cv2.bitwise_and(frame, frame, mask=mask)

                hand_square = result[y_min:y_max, x_min:x_max]
                if hand_square.size > 0:
                    square_size = max(hand_square.shape[0], hand_square.shape[1])
                    square_img = np.zeros((square_size, square_size, 3), dtype=np.uint8)

                    y_offset = (square_size - hand_square.shape[0]) // 2
                    x_offset = (square_size - hand_square.shape[1]) // 2

                    square_img[y_offset:y_offset+hand_square.shape[0], x_offset:x_offset+hand_square.shape[1]] = hand_square

                    predicted_character = predict_hand_sign(model, square_img, class_mapping)

            if predicted_character == prev_prediction:
                prediction_counter += 1
                if prediction_counter >= stable_frames and predicted_character != last_stable_prediction:
                    print(f"Stable Prediction: {predicted_character}")
                    last_stable_prediction = predicted_character
                    output_text += predicted_character
            else:
                prediction_counter = 0

            prev_prediction = predicted_character

        else:
            prediction_counter = 0
            prev_prediction = "DNE"

        status_text = f"Predicted: {predicted_character}"
        if prediction_counter >= stable_frames:
            status_text += " (Stable)"

        cv2.putText(frame, status_text, (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 6)
        cv2.putText(frame, output_text, (30, 150), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 6)

        cv2.imshow("Sign Language Detection", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()