# üß† DeafNet: Real-Time Sign Language Recognition

This project implements a Convolutional Neural Network (CNN) to recognize American Sign Language (ASL) gestures from a live webcam feed using Python, TensorFlow/Keras, and OpenCV.

---

## 1. ‚öôÔ∏è Setup and Installation

Follow these steps to set up your environment and install the required dependencies.

### *1.1. Prerequisites

* Python 3.x (Recommended: 3.8+)
* A working webcam.

### *1.2. Installation Command

Use pip to install all necessary libraries, including TensorFlow for the model, OpenCV for video processing, and NumPy.

```bash
pip install tensorflow opencv-python numpy

## 2. üìä Data Preparation

### 2.1. Load and Preprocess Data

Assuming you have image data (e.g., a folder of images or a CSV like Sign MNIST), load it and perform standardization.

```python
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Placeholder: Replace this with your actual data loading mechanism
# X_data should be images (N, 28, 28, 1) and Y_data should be labels (N,)
# X_data, Y_data = load_my_sign_data() 

# 1. Normalize Pixel Values
X_data = X_data.astype('float32') / 255.0

# 2. One-Hot Encode Labels
num_classes = 26 # For A-Z
Y_data = to_categorical(Y_data, num_classes=num_classes)

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X_data, Y_data, test_size=0.2, random_state=42
)

print(f"Training shapes: X={X_train.shape}, Y={y_train.shape}")

## 3. üß† Model Building and Training

### 3.1. Define the CNN Architecture

We use a simple Sequential model with Convolutional layers for image feature extraction.

```python
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
# Define training parameters
EPOCHS = 20  # Number of passes over the entire dataset
BATCH_SIZE = 64 # Number of samples processed before the model is updated

# Start training the model
history = model.fit(
    X_train, 
    Y_train, 
    epochs=EPOCHS, 
    batch_size=BATCH_SIZE,
    # Use the test data (or a separate validation set) to monitor performance during training
    validation_data=(X_test, Y_test), 
    verbose=1 # Display progress during training
)

In [None]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, Y_test, verbose=0)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy*100:.2f}%')

In [None]:
# Save the entire model to an HDF5 file
# This saves the architecture, weights, and training configuration.
model.save('deafnet_sign_recognition_cnn.h5')
print("\nModel saved successfully to 'deafnet_sign_recognition_cnn.h5'")

## 4. ‚ñ∂Ô∏è Real-Time Prediction (Testing)

### 4.1. Run Live Detection Logic

This code loads the saved model and uses your webcam to recognize signs. *(Note: This requires a working webcam and may be unstable in some notebook environments).*

```python
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('deafnet_slr_model.h5')

# Utility to convert index to letter (0=A, 1=B, etc.)
def index_to_letter(index):
    # This assumes a 0-25 index mapping to A-Z
    return chr(65 + index) 

cap = cv2.VideoCapture(0) # Open the default camera

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # 1. Define and draw the Region of Interest (ROI)
    # Adjust coordinates based on where you want the user to place their hand
    roi_coords = (100, 100, 300, 300) # (x1, y1, x2, y2)
    cv2.rectangle(frame, roi_coords[:2], roi_coords[2:], (0, 255, 0), 2)
    roi = frame[roi_coords[1]:roi_coords[3], roi_coords[0]:roi_coords[2]]

    # 2. Pre-process the ROI
    processed_img = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    processed_img = cv2.resize(processed_img, (28, 28))
    
    # 3. Prepare input for the model
    input_data = np.expand_dims(processed_img, axis=[0, -1]).astype('float32') / 255.0
    
    # 4. Predict
    prediction = loaded_model.predict(input_data, verbose=0)
    predicted_index = np.argmax(prediction)
    predicted_letter = index_to_letter(predicted_index)

    # 5. Display Result
    cv2.putText(frame, f"Sign: {predicted_letter}", (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    
    cv2.imshow('DeafNet Live Feed', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# --- Load the saved model (from Step 2.2) ---
MODEL = load_model('deafnet_slr_model.h5')

# Define a simple function to map index to letter
def get_sign(index):
    # This maps 0 to 'A', 1 to 'B', etc.
    return chr(65 + index) 

cap = cv2.VideoCapture(0) # Start webcam capture

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # --- Define ROI (Region of Interest) for the hand ---
    # Coordinates for a 200x200 box
    x1, y1 = 100, 100
    x2, y2 = 300, 300
    
    roi = frame[y1:y2, x1:x2]
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # --- Pre-process ROI (Grayscale, Resize, Normalize) ---
    processed_img = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    
    # 1. Resize to the model's expected size (e.g., 64x64)
    processed_img = cv2.resize(processed_img, (64, 64)) 
    
    # 2. Add channel dimension for TensorFlow (from (64, 64) to (64, 64, 1))
    processed_img = np.expand_dims(processed_img, axis=-1)
    
    # 3. Normalize the pixel values (0-255 to 0.0-1.0)
    processed_img = processed_img / 255.0
    
    # 4. Add batch dimension (from (64, 64, 1) to (1, 64, 64, 1))
    processed_img = np.expand_dims(processed_img, axis=0)
    
    # --- Prediction ---
    predictions = MODEL.predict(processed_img)
    predicted_index = np.argmax(predictions)
    predicted_sign = get_sign(predicted_index)
    confidence = predictions[0][predicted_index] * 100

    # --- Display result on the main frame ---
    text = f'{predicted_sign} ({confidence:.2f}%)'
    cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # --- Show the video frame ---
    cv2.imshow('Sign Language Recognition', frame)
    
    # Exit loop on 'q' press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# --- Cleanup ---
cap.release()
cv2.destroyAllWindows()