<a href="https://colab.research.google.com/github/jianna4/Kenya-Sign-Language/blob/main/KSL_SIGN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
joanwachuka_ksl_hand_landmarks_path = kagglehub.dataset_download('joanwachuka/ksl-hand-landmarks')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout ,Normalization , BatchNormalization, Bidirectional, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

In [None]:
# Configuration
DATA_PATH = "/kaggle/input/ksl-hand-landmarks/dataset"
MAX_FRAMES = 60
FEATURES = 126
EPOCHS = 50  # Reduced for faster training; adjust as needed
BATCH_SIZE = 4
LEARNING_RATE = 0.001
LABELS = ['a', 'is', 'mother', 'my', 'this']  # Fixed known labels

In [None]:
# Load data (no augmentation)
X, y = [], []
for label_idx, label in enumerate(LABELS):
    label_dir = os.path.join(DATA_PATH, label)
    for file in os.listdir(label_dir):
        if file.endswith('.npy'):
            seq = np.load(os.path.join(label_dir, file))
            X.append(seq)
            y.append(label_idx)


print("Number of sequences:", len(X))
print("Example sequence shape:", X[0].shape)

# Convert to arrays
X = np.array(X, dtype=object)
y = np.array(y)


# Pad sequences to make them same length


# Pad sequences
X_padded = pad_sequences(X, maxlen=MAX_FRAMES, dtype='float32', padding='post', truncating='post')
y_onehot = to_categorical(y, num_classes=len(LABELS))
print("X shape after padding:", X_padded.shape)



In [None]:
# Train/val/test split (stratified)
X_train, X_temp, y_train, y_temp = train_test_split(
    X_padded, y_onehot, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=np.argmax(y_temp, axis=1)
)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("x_val shape:", X_val.shape)



norm = Normalization(axis=-1)
norm.adapt(X_train)

In [None]:


# MODEL

model = Sequential([

    Input(shape=(MAX_FRAMES, FEATURES)),
    Bidirectional(LSTM(32, return_sequences=True, dropout=0.3)),
    BatchNormalization(),
    LSTM(16, dropout=0.3),
    BatchNormalization(),
    Dense(32, activation='relu'),
    Dropout(0.4),
    Dense(len(LABELS), activation='softmax')
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
# Callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=8, min_lr=1e-6),
    ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')
]

model.summary()

In [None]:
# ==============================
# TRAIN
# ==============================
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)


test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\nâœ… Test Accuracy: {test_acc:.4f}")

# Save final model & labels
model.save("ksl_sign_language_model.keras")
np.save("sign_labels.npy", np.array(LABELS))

print("Model and labels saved!")
# ==============================
# SAVE
# ==============================
# Save final model & labels
model.save("ksl_sign_language_model.keras")
np.save("sign_labels.npy", np.array(LABELS))

print("Model and labels saved!")

In [None]:
# Evaluate
print("\nEvaluating best model...")
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Predict and show classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

from sklearn.metrics import classification_report, confusion_matrix
print("\nClassification Report:")
print(classification_report(y_true_classes, y_pred_classes, target_names=labels))

print("\nConfusion Matrix:")
print(confusion_matrix(y_true_classes, y_pred_classes))