In [None]:
# Sign Language Detection - I
# Saving Data
import cv2
import numpy as np
import mediapipe as mp
import os
import csv

cap = cv2.VideoCapture(0)
mp_hand = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hand = mp_hand.Hands(min_detection_confidence=0.7,min_tracking_confidence=0.7)

if not os.path.exists("asl_dataset"):
    os.makedirs("asl_dataset")

file_path = "asl_dataset/landmarks.csv"
if not os.path.isfile(file_path):
    with open(file_path,'w',newline='') as f:
        writer = csv.writer(f)
        writer.writerow([f"x{i}" for i in range(21)] + [f"y{i}" for i in range(21)] + ["label"])

print("Press keys A-Z to record samples. Press 'd' to Stop.")

while cap.isOpened():
    r, f = cap.read()
    if r == False:
        break
    f = cv2.flip(f, 1)
    rgb = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)

    res = hand.process(rgb)
    key = cv2.waitKey(1) & 0xff

    if res.multi_hand_landmarks:
        for landmarks in res.multi_hand_landmarks:
            mp_draw.draw_landmarks(f, landmarks, mp_hand.HAND_CONNECTIONS)

            x_list = []
            y_list = []
            for lm in landmarks.landmark:
                h, w, _ = f.shape
                x, y = int(lm.x * w), int(lm.y * h)
                x_list.append(x)
                y_list.append(y)

            if 97 <= key <= 122:
                label = chr(key).upper()
                print(f"Saving label for : {label}")
                with open(file_path, 'a', newline='') as f_csv:
                    writer = csv.writer(f_csv)
                    writer.writerow(x_list + y_list + [label])

    if key == ord('d'):
        # Optionally, you can print a message when stopping
        print("Stopped recording samples.")
        break

    # Only show the frame if it is valid
    if f is not None and isinstance(f, np.ndarray):
        cv2.imshow("Sign Language Detection", f)

cap.release()
cv2.destroyAllWindows()

In [None]:
# Sign Language Detection - II
# Training Data

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

def normalize_landmarks(df):
    x_cols = [f"x{i}" for i in range(21)]
    y_cols = [f"y{i}" for i in range(21)]

    for i in range(21):
        df[f"x{i}"] = df[f"x{i}"] - df["x0"]
        df[f"y{i}"] = df[f"y{i}"] - df["y0"]
    
    return df.drop(["x0", "y0"], axis=1)


data = pd.read_csv("asl_dataset/landmarks.csv")

x = data.drop("label",axis=1)
y = data["label"]
features = data.drop("label", axis=1)
features.columns = [f"x{i}" if i < 21 else f"y{i-21}" for i in range(42)]  # Rename columns

features = normalize_landmarks(features)


le = LabelEncoder()
y_encoded = le.fit_transform(y)

x_train, x_test,y_train,y_test = train_test_split(features,y_encoded,test_size=0.2,random_state=42)

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
# {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}
model = RandomForestClassifier(n_estimators=200,max_depth=10,min_samples_split=2,random_state=42)
model.fit(x_train,y_train)



y_pred = model.predict(x_test)

ac = accuracy_score(y_test,y_pred)
print("Total Accuracy: ",ac)
print("Classification report :\n" ,classification_report(y_test,y_pred))

joblib.dump(model,"asl_model.pkl")
joblib.dump(le,"label_encoder.pkl")


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [10, 20, None],
    "min_samples_split": [2, 5],
}

grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, verbose=2)
grid.fit(x_train, y_train)

print("Best Params:", grid.best_params_)


In [None]:
# Sign Language Detection - III
# Visualizing the Trained Data

import cv2
import numpy as np
import mediapipe as mp
import joblib

model = joblib.load("asl_model.pkl")
le = joblib.load("label_encoder.pkl")


cap = cv2.VideoCapture(0)
mp_hand = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hand.Hands(min_detection_confidence=0.8,min_tracking_confidence=0.8)

while cap.isOpened():
    r,f = cap.read()
    if r==False:
        break
    f = cv2.flip(f,1)
    rgb = cv2.cvtColor(f,cv2.COLOR_BGR2RGB)
    res = hands.process(rgb)

    if res.multi_hand_landmarks:
        for landmarks in res.multi_hand_landmarks:
            mp_draw.draw_landmarks(f,landmarks,mp_hand.HAND_CONNECTIONS)

            h,w,_ = f.shape
            x_list,y_list = [],[]
            for lm in landmarks.landmark:
                x_list.append(int(lm.x*w))
                y_list.append(int(lm.y*h))

            if(len(x_list)==21 and len(y_list)==21):
                # sample = np.array([x_list+y_list])
                x0, y0 = x_list[0], y_list[0]
                x_norm = [x - x0 for x in x_list]
                y_norm = [y - y0 for y in y_list]
                x_norm = x_norm[1:]
                y_norm = y_norm[1:]
                sample = np.array([x_norm + y_norm])  # 40 features as expected

                # print(sample,"\n\n\n\n")
                prediction = model.predict(sample)
                label = le.inverse_transform(prediction)[0]
                cv2.putText(f,f"Prediction: {label}",(10,40),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),1)


    cv2.imshow("Sign Language",f)
    if cv2.waitKey(25) & 0xff == ord('d'):
        break

cap.release()
cv2.destroyAllWindows()