## Data Augmentation

In [1]:
import os
import random
from PIL import Image, ImageEnhance, ImageOps
import numpy as np

In [2]:
# Define augmentation functions
def random_rotation(image):
    angle = random.uniform(-30, 30)  # Rotate between -30 to 30 degrees
    return image.rotate(angle)

def random_flip(image):
    if random.choice([True, False]):
        return ImageOps.mirror(image)
    return image

def random_brightness(image):
    enhancer = ImageEnhance.Brightness(image)
    factor = random.uniform(0.7, 1.3)  # Brightness factor
    return enhancer.enhance(factor)

def random_contrast(image):
    enhancer = ImageEnhance.Contrast(image)
    factor = random.uniform(0.7, 1.3)  # Contrast factor
    return enhancer.enhance(factor)

def add_random_noise(image):
    np_image = np.array(image)
    noise = np.random.normal(0, 25, np_image.shape).astype(np.int16)
    noisy_image = np.clip(np_image + noise, 0, 255).astype(np.uint8)
    return Image.fromarray(noisy_image)

def augment_image(image):
    image = random_rotation(image)
    image = random_flip(image)
    # image = random_brightness(image)
    # image = random_contrast(image)
    # image = add_random_noise(image)
    return image


In [3]:
input_folder = "C:/Users/User/OneDrive/Documents/SignLanguageApp/SLangDataset/data_est_white"
output_folder = "C:/Users/User/OneDrive/Documents/SignLanguageApp/SLangDataset/LMARK_aug_data_est_white"
os.makedirs(output_folder, exist_ok=True)

In [4]:
# Iterate over all subfolders and images
for subdir, _, files in os.walk(input_folder):
    relative_path = os.path.relpath(subdir, input_folder)
    output_subdir = os.path.join(output_folder, relative_path)
    os.makedirs(output_subdir, exist_ok=True)

    for file in files:
        if file.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'tiff')):
            input_path = os.path.join(subdir, file)
            output_path = os.path.join(output_subdir, file)

            try:
                with Image.open(input_path) as img:
                    img = img.convert("L")  # Ensure greyscale (black and white)
                    augmented_img = augment_image(img)
                    augmented_img.save(output_path)
            except Exception as e:
                print(f"Error processing {input_path}: {e}")

print("Data augmentation completed!")


Data augmentation completed!


## Joining Augmented Data with Original Data

In [6]:
import os
import shutil

dataset1 = r"C:\Users\User\OneDrive\Documents\SignLanguageApp\SLangDataset\LMARK_merged_data_est"
dataset2 = r"C:\Users\User\OneDrive\Documents\SignLanguageApp\SLangDataset\LMARK_merged_data_est_white"
output_dataset = r"C:\Users\User\OneDrive\Documents\SignLanguageApp\SLangDataset\LMARK_merged_data_est_CombinedBW"

# Create the output directory if it doesn't exist
os.makedirs(output_dataset, exist_ok=True)

# Function to merge datasets with renaming
def merge_datasets(source_dir, target_dir, suffix=""):
    for class_name in os.listdir(source_dir):
        source_class_path = os.path.join(source_dir, class_name)
        target_class_path = os.path.join(target_dir, class_name)
        
        if os.path.isdir(source_class_path):
            # Create the class folder in the target if it doesn't exist
            if not os.path.exists(target_class_path):
                os.makedirs(target_class_path)
            
            for file_name in os.listdir(source_class_path):
                source_file_path = os.path.join(source_class_path, file_name)
                # Add the specified suffix to the file name
                base_name, ext = os.path.splitext(file_name)
                file_name = f"{base_name}{suffix}{ext}"
                target_file_path = os.path.join(target_class_path, file_name)
                
                # Copy the file to the target directory
                shutil.copy2(source_file_path, target_file_path)

# Merge the main dataset
merge_datasets(dataset1, output_dataset, suffix="_black")

# Merge the augmented dataset with "_AUG" renaming
merge_datasets(dataset2, output_dataset, suffix="_white")


print(f"Datasets merged into: {output_dataset}")

Datasets merged into: C:\Users\User\OneDrive\Documents\SignLanguageApp\SLangDataset\LMARK_merged_data_est_CombinedBW


### Run Trainer3.py for training the new data. Run to open UI.

In [1]:
import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available: 1


In [2]:
import cv2
import mediapipe as mp
from cvzone.HandTrackingModule import HandDetector
from cvzone.ClassificationModule import Classifier
import numpy as np
import math
import tkinter as tk
from tkinter import scrolledtext
from PIL import Image, ImageTk

# ---------------------------
# Your existing processing code
# ---------------------------

def detect_skin(frame):
    # Convert to YCrCb and equalize the luminance channel
    ycrcb = cv2.cvtColor(frame, cv2.COLOR_BGR2YCrCb)
    y_channel = ycrcb[:, :, 0]
    y_eq = cv2.equalizeHist(y_channel)
    ycrcb[:, :, 0] = y_eq

    # Adjusted thresholds might be needed after equalization.
    lower_skin = np.array([0, 133, 77], dtype=np.uint8)
    upper_skin = np.array([255, 173, 127], dtype=np.uint8)
    mask = cv2.inRange(ycrcb, lower_skin, upper_skin)
    
    # Noise reduction using morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.GaussianBlur(mask, (5, 5), 0)
    
    # Optionally, keep only the largest contour (if needed)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_mask = np.zeros_like(mask)
    if contours:
        cv2.drawContours(contour_mask, contours, -1, 255, thickness=cv2.FILLED)
    mask = cv2.bitwise_and(mask, contour_mask)
    
    return mask

# ---------------------------
# Initialization
# ---------------------------
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
classifier = Classifier("C:/Users/User/OneDrive/Documents/SignLanguageApp/TrainedBinary2Model/MobileNetV2_model.h5")

offset = 45
imgSize = 250
labels = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y"]

mp_hands = mp.solutions.hands  # for landmark connections
hand_connections = mp_hands.HAND_CONNECTIONS

# To store the history of predictions
prediction_history = []

# ---------------------------
# Tkinter UI Setup
# ---------------------------
root = tk.Tk()
root.title("Sign Language Recognition")
root.geometry("1200x800")  # Adjust window size as needed

# Divide the window into two panels:
# Left panel (biggest) for the main camera feed.
left_frame = tk.Frame(root, width=800, height=800, bg="black")
left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

# Right panel for the additional images and prediction history.
right_frame = tk.Frame(root, width=400, height=800)
right_frame.pack(side=tk.RIGHT, fill=tk.Y)

# Label for the main camera feed (largest display)
main_image_label = tk.Label(left_frame)
main_image_label.pack(fill=tk.BOTH, expand=True)

# Labels for the processed binary image and the hand landmarks.
binary_label = tk.Label(right_frame)
binary_label.pack(pady=5)

landmarks_label = tk.Label(right_frame)
landmarks_label.pack(pady=5)

# Scrolled text widget to show prediction history.
history_text = scrolledtext.ScrolledText(right_frame, width=40, height=20)
history_text.pack(pady=5)
history_text.configure(state='disabled')

# ---------------------------
# Update function for video frames and UI elements
# ---------------------------
def update_frame():
    global prediction_history
    success, img = cap.read()
    if not success:
        root.after(10, update_frame)
        return

    imgOutput = img.copy()
    hands, img = detector.findHands(img, draw=False)
    
    # Variables to hold images for right-panel display.
    imgWhite_for_display = None
    imgCrop_landmarked_for_display = None
    
    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']
        y1, y2 = max(0, y - offset), min(img.shape[0], y + h + offset)
        x1, x2 = max(0, x - offset), min(img.shape[1], x + w + offset)
        imgCrop = img[y1:y2, x1:x2]
        
        if imgCrop.shape[0] > 0 and imgCrop.shape[1] > 0:
            # Draw hand landmarks on the cropped image (for visualization)
            imgCrop_landmarked = imgCrop.copy()
            if 'lmList' in hand:
                lm_list = hand['lmList']
                for lm in lm_list:
                    cv2.circle(imgCrop_landmarked, (lm[0] - x1, lm[1] - y1), 4, (0, 0, 255), -1)
                for connection in mp_hands.HAND_CONNECTIONS:
                    if connection[0] < len(lm_list) and connection[1] < len(lm_list):
                        pt1 = (lm_list[connection[0]][0] - x1, lm_list[connection[0]][1] - y1)
                        pt2 = (lm_list[connection[1]][0] - x1, lm_list[connection[1]][1] - y1)
                        cv2.line(imgCrop_landmarked, pt1, pt2, (0, 0, 255), 2)
            
            # Create the binary image from the cropped region.
            binaryMask = detect_skin(imgCrop)
            binary_result = np.zeros_like(imgCrop)
            binary_result[binaryMask > 0] = [255, 255, 255]
            
            # Overlay landmarks on the binary image.
            if 'lmList' in hand:
                for lm in lm_list:
                    cv2.circle(binary_result, (lm[0] - x1, lm[1] - y1), 4, (0, 0, 0), -1)
                for connection in mp_hands.HAND_CONNECTIONS:
                    pt1 = (lm_list[connection[0]][0] - x1, lm_list[connection[0]][1] - y1)
                    pt2 = (lm_list[connection[1]][0] - x1, lm_list[connection[1]][1] - y1)
                    cv2.line(binary_result, pt1, pt2, (0, 0, 0), 2)
            
            # Resize the binary image to a fixed size (while preserving aspect ratio)
            aspectRatio = h / w
            imgWhite = np.ones((imgSize, imgSize), np.uint8) * 0
            if aspectRatio > 1:
                k = imgSize / h
                wCal = math.ceil(k * w)
                imgResize = cv2.resize(binary_result, (wCal, imgSize))
                wGap = math.ceil((imgSize - wCal) / 2)
                imgWhite[:, wGap:wCal + wGap] = cv2.cvtColor(imgResize, cv2.COLOR_BGR2GRAY)
            else:
                k = imgSize / w
                hCal = math.ceil(k * h)
                imgResize = cv2.resize(binary_result, (imgSize, hCal))
                hGap = math.ceil((imgSize - hCal) / 2)
                imgWhite[hGap:hCal + hGap, :] = cv2.cvtColor(imgResize, cv2.COLOR_BGR2GRAY)
            
            # Prepare for classification.
            imgWhiteRGB = cv2.cvtColor(imgWhite, cv2.COLOR_GRAY2BGR)
            prediction, index = classifier.getPrediction(imgWhiteRGB, draw=False)
            
            # If the confidence is high enough, annotate the main image and record the prediction.
            if prediction[index] > 0.75 and 0 <= index < len(labels):
                cv2.rectangle(imgOutput, (x - offset, y - offset - 50),
                              (x - offset + 90, y - offset - 50 + 50), (255, 0, 255), cv2.FILLED)
                cv2.putText(imgOutput, labels[index], (x, y - 26),
                            cv2.FONT_HERSHEY_COMPLEX, 1.7, (255, 255, 255), 2)
                cv2.rectangle(imgOutput, (x - offset, y - offset),
                              (x + w + offset, y + h + offset), (255, 0, 255), 4)
                
                # Append the prediction with its probability to the history.
                pred_text = f"{labels[index]}: {prediction[index]:.2f}"
                prediction_history.append(pred_text)
                if len(prediction_history) > 50:
                    prediction_history = prediction_history[-50:]
                history_text.configure(state='normal')
                history_text.insert(tk.END, pred_text + "\n")
                history_text.see(tk.END)
                history_text.configure(state='disabled')
            
            # Save images to display on the right panel.
            imgWhite_for_display = imgWhite.copy()
            imgCrop_landmarked_for_display = imgCrop_landmarked.copy()
    
    # ---------------------------
    # Convert OpenCV images to a format Tkinter can display.
    # Main image (imgOutput) is in BGR, so convert to RGB.
    imgOutput_rgb = cv2.cvtColor(imgOutput, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(imgOutput_rgb)
    img_tk = ImageTk.PhotoImage(image=img_pil)
    main_image_label.imgtk = img_tk
    main_image_label.configure(image=img_tk)
    
    # Update the processed binary image display.
    if imgWhite_for_display is not None:
        imgWhite_rgb = cv2.cvtColor(imgWhite_for_display, cv2.COLOR_GRAY2RGB)
        imgWhite_pil = Image.fromarray(imgWhite_rgb)
        imgWhite_tk = ImageTk.PhotoImage(image=imgWhite_pil)
        binary_label.imgtk = imgWhite_tk
        binary_label.configure(image=imgWhite_tk)
    else:
        binary_label.configure(image='')
    
    # Update the hand landmarks display.
    if imgCrop_landmarked_for_display is not None:
        imgCrop_rgb = cv2.cvtColor(imgCrop_landmarked_for_display, cv2.COLOR_BGR2RGB)
        imgCrop_pil = Image.fromarray(imgCrop_rgb)
        imgCrop_tk = ImageTk.PhotoImage(image=imgCrop_pil)
        landmarks_label.imgtk = imgCrop_tk
        landmarks_label.configure(image=imgCrop_tk)
    else:
        landmarks_label.configure(image='')
    
    root.after(10, update_frame)

# ---------------------------
# Handle window closing to release the camera
# ---------------------------
def on_closing():
    cap.release()
    root.destroy()

root.protocol("WM_DELETE_WINDOW", on_closing)

# Start the update loop.
update_frame()
root.mainloop()


ImportError: cannot import name 'builder' from 'google.protobuf.internal' (c:\Users\User\anaconda3\envs\tensorflow_env\lib\site-packages\google\protobuf\internal\__init__.py)