# AI Sign Language Translator - Training Notebook

## Step 1: Install Dependencies
Run this cell first. **If it tells you to Restart Runtime, please do so from the Runtime menu!**

In [None]:
!pip install mediapipe tensorflow tensorflowjs opencv-python pandas scikit-learn matplotlib

## Step 2: Restart Runtime
**Runtime > Restart Session** (or Restart Runtime)
Then proceed to Step 3.

## Step 3: Setup Data
Upload `kaggle.json` or your dataset zip.

In [None]:
import os
# OPTION A: Using Kaggle API
# !mkdir -p ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json
# !kaggle datasets download -d grassknoted/asl-alphabet
# !unzip -q asl-alphabet.zip

# OPTION B: Manual Upload (e.g. data.zip)
# !unzip -q data.zip

## Step 4: Process Data (Fixed)

In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd

# ROBUST IMPORT STRATEGY
try:
    mp_hands = mp.solutions.hands
except AttributeError:
    try:
        import mediapipe.python.solutions.hands as mp_hands
    except ImportError:
        from mediapipe import solutions
        mp_hands = solutions.hands

def extract_landmarks(image_path):
    try:
        with mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5) as hands:
            image = cv2.imread(image_path)
            if image is None: return None
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            if not results.multi_hand_landmarks: return None
            lm = []
            for l in results.multi_hand_landmarks[0].landmark:
                lm.extend([l.x, l.y, l.z])
            return lm
    except Exception: return None

data = []
DATA_DIR = "asl_alphabet_train/asl_alphabet_train" # CHECK THIS PATH matches your unzipped folder

if not os.path.exists(DATA_DIR):
    print(f"Warning: {DATA_DIR} not found. please check the file browser on the left.")
else:
    classes = sorted(os.listdir(DATA_DIR))
    print(f"Processing {len(classes)} classes")

    for label in classes:
        class_dir = os.path.join(DATA_DIR, label)
        if os.path.isdir(class_dir):
            # Processing limited images for speed. Remove [:100] to process all.
            images = [img for img in os.listdir(class_dir) if img.endswith(('.jpg', '.jpeg', '.png'))][:100] 
            
            for img_name in images:
                path = os.path.join(class_dir, img_name)
                lm = extract_landmarks(path)
                if lm:
                    data.append(lm + [label])

    columns = [f'{c}_{i}' for i in range(21) for c in ['x', 'y', 'z']] + ['label']
    df = pd.DataFrame(data, columns=columns)
    df.to_csv('landmarks.csv', index=False)
    print("Data extracted to landmarks.csv")

## Step 5: Train Model

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

if not os.path.exists('landmarks.csv'):
    print("Error: landmarks.csv not found. Did Step 4 run?")
else:
    df = pd.read_csv('landmarks.csv')
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    classes = label_encoder.classes_
    print("Classes:", classes)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(63,)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(len(classes), activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))
    model.save('model.h5')

## Step 6: Download Model

In [None]:
!tensorflowjs_converter --input_format=keras model.h5 tfjs_model
!zip -r tfjs_model.zip tfjs_model
from google.colab import files
files.download('tfjs_model.zip')