## Akuisisi Data

In [None]:
# DATA BISA DIAKUISISI DENGAN MENGGUNAKAN KAGGLE API ATAU 
# MENGUNDUH MANUAL DARI HALAMAN KAGGLE:
# https://www.kaggle.com/datasets/kapillondhe/american-sign-language

# UNZIP DATASET LALU LETAKKAN DATASET DI DIREKTORI YANG SAMA 
# DENGAN FILE INI

In [None]:
# JIKA TIDAK INGIN MENDOWNLOAD DATASET,
# LINK MODEL DAN DATA SIAP TRAIN TERSEDIA DI instruksi.txt

## Import Library

In [1]:
import os
import cv2
import mediapipe as mp
import pickle
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical





## Init Awal

In [2]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [3]:
TRAIN_DIR = 'ASL_Dataset/Train'
TEST_DIR  = 'ASL_Dataset/Test'

## Function Ekstrak Keypoints

In [5]:

from tqdm import tqdm  # progress bar

def extract_landmarks_with_progress(data_dir, hands):
    """
    Ekstrak landmark Mediapipe dari semua gambar di folder dataset

    :param data_dir: folder dataset (Train atau Test)
    :param hands: instance mp_hands.Hands
    :return: np.array(data), np.array(labels)
    """
    data = []
    labels = []

    labels_list = sorted([l for l in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, l))])
    total_images = sum(len(os.listdir(os.path.join(data_dir, l))) for l in labels_list)
    print(f"Total labels: {len(labels_list)}, Total images: {total_images}\n")

    image_count = 0

    for label in labels_list:
        label_path = os.path.join(data_dir, label)
        img_files = os.listdir(label_path)
        print(f"Processing label '{label}' with {len(img_files)} images...")

        for img_name in tqdm(img_files, desc=f"{label}", unit="img"):
            img_path = os.path.join(label_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = hands.process(img_rgb)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    data_aux = []
                    for lm in hand_landmarks.landmark:
                        data_aux.extend([lm.x, lm.y])

                    data.append(data_aux)
                    labels.append(label)

            image_count += 1

    print(f"\nExtraction completed. Total images processed: {image_count}")
    return np.array(data), np.array(labels)


## Visualisasi Awal

In [None]:
import os
import matplotlib.pyplot as plt


TRAIN_DIR = 'ASL_Dataset/Train'

with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5
) as hands:

    for label in sorted(os.listdir(TRAIN_DIR)):
        label_path = os.path.join(TRAIN_DIR, label)

        if not os.path.isdir(label_path):
            continue

        # ambil 1 gambar saja per kelas
        img_files = os.listdir(label_path)
        if len(img_files) == 0:
            continue

        img_path = os.path.join(label_path, img_files[0])
        img = cv2.imread(img_path)
        if img is None:
            continue

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = hands.process(img_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    img_rgb,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

        plt.figure(figsize=(4, 4))
        plt.title(label)
        plt.imshow(img_rgb)
        plt.axis('off')

plt.show()


## Preprocessing

Ekstraksi Landmark -> vektor fitur & label -> siap training

In [15]:
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.9
) as hands:
    X_train, y_train = extract_landmarks_with_progress(TRAIN_DIR, hands)


Total labels: 28, Total images: 165670

Processing label 'A' with 5996 images...


A: 100%|██████████| 5996/5996 [05:12<00:00, 19.16img/s]


Processing label 'B' with 5996 images...


B: 100%|██████████| 5996/5996 [05:11<00:00, 19.27img/s]


Processing label 'C' with 5996 images...


C: 100%|██████████| 5996/5996 [04:29<00:00, 22.25img/s]


Processing label 'D' with 5996 images...


D: 100%|██████████| 5996/5996 [03:26<00:00, 29.10img/s]


Processing label 'E' with 5996 images...


E: 100%|██████████| 5996/5996 [05:22<00:00, 18.60img/s]


Processing label 'F' with 5996 images...


F: 100%|██████████| 5996/5996 [05:08<00:00, 19.45img/s]


Processing label 'G' with 5996 images...


G: 100%|██████████| 5996/5996 [05:08<00:00, 19.41img/s]


Processing label 'H' with 5996 images...


H: 100%|██████████| 5996/5996 [05:05<00:00, 19.60img/s]


Processing label 'I' with 5996 images...


I: 100%|██████████| 5996/5996 [05:04<00:00, 19.72img/s]


Processing label 'J' with 5996 images...


J: 100%|██████████| 5996/5996 [05:07<00:00, 19.49img/s]


Processing label 'K' with 5996 images...


K: 100%|██████████| 5996/5996 [04:34<00:00, 21.82img/s]


Processing label 'L' with 5996 images...


L: 100%|██████████| 5996/5996 [04:45<00:00, 20.97img/s]


Processing label 'M' with 5996 images...


M: 100%|██████████| 5996/5996 [03:47<00:00, 26.31img/s]


Processing label 'N' with 5996 images...


N: 100%|██████████| 5996/5996 [02:57<00:00, 33.71img/s]


Processing label 'Nothing' with 5996 images...


Nothing: 100%|██████████| 5996/5996 [02:47<00:00, 35.83img/s]


Processing label 'O' with 5996 images...


O: 100%|██████████| 5996/5996 [04:10<00:00, 23.96img/s]


Processing label 'P' with 5996 images...


P: 100%|██████████| 5996/5996 [03:57<00:00, 25.27img/s]


Processing label 'Q' with 5996 images...


Q: 100%|██████████| 5996/5996 [04:06<00:00, 24.29img/s]


Processing label 'R' with 5966 images...


R: 100%|██████████| 5966/5966 [05:25<00:00, 18.35img/s]


Processing label 'S' with 5996 images...


S: 100%|██████████| 5996/5996 [04:58<00:00, 20.11img/s]


Processing label 'Space' with 5886 images...


Space: 100%|██████████| 5886/5886 [03:40<00:00, 26.70img/s]


Processing label 'T' with 5648 images...


T: 100%|██████████| 5648/5648 [04:56<00:00, 19.08img/s]


Processing label 'U' with 4542 images...


U: 100%|██████████| 4542/4542 [04:04<00:00, 18.57img/s]


Processing label 'V' with 5996 images...


V: 100%|██████████| 5996/5996 [05:23<00:00, 18.54img/s]


Processing label 'W' with 5996 images...


W: 100%|██████████| 5996/5996 [05:19<00:00, 18.78img/s]


Processing label 'X' with 5996 images...


X: 100%|██████████| 5996/5996 [05:18<00:00, 18.80img/s]


Processing label 'Y' with 5720 images...


Y: 100%|██████████| 5720/5720 [05:01<00:00, 19.00img/s]


Processing label 'Z' with 5996 images...


Z: 100%|██████████| 5996/5996 [04:05<00:00, 24.40img/s]



Extraction completed. Total images processed: 165670


In [14]:
with mp_hands.Hands( 
    static_image_mode=True, 
    max_num_hands=1, 
    min_detection_confidence=0.9 
) as hands:
    X_test, y_test = extract_landmarks_with_progress(TEST_DIR, hands)

Total labels: 28, Total images: 112

Processing label 'A' with 4 images...


A: 100%|██████████| 4/4 [00:00<00:00, 15.82img/s]


Processing label 'B' with 4 images...


B: 100%|██████████| 4/4 [00:00<00:00, 18.32img/s]


Processing label 'C' with 4 images...


C: 100%|██████████| 4/4 [00:00<00:00, 18.51img/s]


Processing label 'D' with 4 images...


D: 100%|██████████| 4/4 [00:00<00:00, 28.17img/s]


Processing label 'E' with 4 images...


E: 100%|██████████| 4/4 [00:00<00:00, 18.41img/s]


Processing label 'F' with 4 images...


F: 100%|██████████| 4/4 [00:00<00:00, 18.52img/s]


Processing label 'G' with 4 images...


G: 100%|██████████| 4/4 [00:00<00:00, 18.74img/s]


Processing label 'H' with 4 images...


H: 100%|██████████| 4/4 [00:00<00:00, 17.48img/s]


Processing label 'I' with 4 images...


I: 100%|██████████| 4/4 [00:00<00:00, 17.80img/s]


Processing label 'J' with 4 images...


J: 100%|██████████| 4/4 [00:00<00:00, 17.66img/s]


Processing label 'K' with 4 images...


K: 100%|██████████| 4/4 [00:00<00:00, 18.27img/s]


Processing label 'L' with 4 images...


L: 100%|██████████| 4/4 [00:00<00:00, 18.58img/s]


Processing label 'M' with 4 images...


M: 100%|██████████| 4/4 [00:00<00:00, 18.74img/s]


Processing label 'N' with 4 images...


N: 100%|██████████| 4/4 [00:00<00:00, 31.27img/s]


Processing label 'Nothing' with 4 images...


Nothing: 100%|██████████| 4/4 [00:00<00:00, 34.25img/s]


Processing label 'O' with 4 images...


O: 100%|██████████| 4/4 [00:00<00:00, 33.94img/s]


Processing label 'P' with 4 images...


P: 100%|██████████| 4/4 [00:00<00:00, 34.66img/s]


Processing label 'Q' with 4 images...


Q: 100%|██████████| 4/4 [00:00<00:00, 34.82img/s]


Processing label 'R' with 4 images...


R: 100%|██████████| 4/4 [00:00<00:00, 18.16img/s]


Processing label 'S' with 4 images...


S: 100%|██████████| 4/4 [00:00<00:00, 18.01img/s]


Processing label 'Space' with 4 images...


Space: 100%|██████████| 4/4 [00:00<00:00, 28.21img/s]


Processing label 'T' with 4 images...


T: 100%|██████████| 4/4 [00:00<00:00, 18.24img/s]


Processing label 'U' with 4 images...


U: 100%|██████████| 4/4 [00:00<00:00, 18.84img/s]


Processing label 'V' with 4 images...


V: 100%|██████████| 4/4 [00:00<00:00, 18.06img/s]


Processing label 'W' with 4 images...


W: 100%|██████████| 4/4 [00:00<00:00, 18.27img/s]


Processing label 'X' with 4 images...


X: 100%|██████████| 4/4 [00:00<00:00, 18.38img/s]


Processing label 'Y' with 4 images...


Y: 100%|██████████| 4/4 [00:00<00:00, 17.94img/s]


Processing label 'Z' with 4 images...


Z: 100%|██████████| 4/4 [00:00<00:00, 23.76img/s]


Extraction completed. Total images processed: 112





## Simpan hasil ekstraksi

In [16]:
import pickle

# Simpan data train
with open('train_data.pickle', 'wb') as f:
    pickle.dump({'data': X_train, 'labels': y_train}, f)

# Simpan data test
with open('test_data.pickle', 'wb') as f:
    pickle.dump({'data': X_test, 'labels': y_test}, f)


## Load data siap training

Data train dan test yang sudah siap di train: https://drive.google.com/drive/folders/1-CYIYzcI2VOMf6ESV_ZW5BKrJ6mu2uZ5?usp=sharing



In [None]:
## Load data train dan test
# with open('trWain_data.pickle', 'rb') as f:
#     train = pickle.load(f)
# X_train = train['data']
# y_train = train['labels']

# with open('test_data.pickle', 'rb') as f:
#     test = pickle.load(f)
# X_test = test['data']
# y_test = test['labels']

In [5]:
with open('train_data.pickle', 'rb') as f:
    train = pickle.load(f)
X_full = train['data']
y_full = train['labels']

## Training Model - Arsitektur MLP

Encoding

In [6]:
# Encode label ke one-hot
le = LabelEncoder()
y_full_enc = to_categorical(le.fit_transform(y_full))

In [7]:
# Simpan label encoder
with open('label_encoder.pickle', 'wb') as f:
    pickle.dump(le, f)

Split Train/Test

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X_full, y_full_enc,
    test_size=0.15,
    random_state=42,
    stratify=y_full_enc.argmax(axis=1)  # jaga proporsi kelas
)

Inisialisasi Model

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [None]:
input_dim = X_train.shape[1]  # 42 (21 landmark * 2)
n_classes = y_train.shape[1]

In [11]:
model_nn = Sequential([
    Dense(128, activation='relu', input_shape=(input_dim,)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(n_classes, activation='softmax')
])




In [12]:
model_nn.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)




In [13]:
model_nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               5504      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 27)                1755      
                                                                 
Total params: 15515 (60.61 KB)
Trainable params: 15515 (60.61 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Training

In [14]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='val_accuracy', 
    patience=3,     # jika val_accuracy tidak naik 3 epoch berturut-turut, stop
    restore_best_weights=True
)

In [15]:
# train the model
history = model_nn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,       # tetap set max epoch tinggi
    batch_size=16,
    callbacks=[early_stop]
)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


Evaluasi

In [16]:
# Eval
loss, acc = model_nn.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")

Test Accuracy: 99.96%


In [17]:

# ground truth & prediksi
y_true = np.argmax(y_test, axis=1)
y_pred = np.argmax(model_nn.predict(X_test, verbose=0), axis=1)

print("Classification Report:")
print(classification_report(
    y_true,
    y_pred,
    target_names=le.classes_  
))


Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00       877
           B       1.00      1.00      1.00       887
           C       1.00      1.00      1.00       635
           D       1.00      1.00      1.00       239
           E       1.00      1.00      1.00       865
           F       1.00      1.00      1.00       872
           G       1.00      1.00      1.00       869
           H       1.00      1.00      1.00       873
           I       1.00      1.00      1.00       869
           J       1.00      1.00      1.00       886
           K       1.00      1.00      1.00       898
           L       1.00      1.00      1.00       748
           M       0.99      1.00      1.00       371
           N       1.00      0.97      0.99        77
           O       1.00      1.00      1.00       525
           P       1.00      1.00      1.00       435
           Q       1.00      1.00      1.00       417
    

## Simpan Model

In [None]:
model_nn.save('model_nn.h5')

  saving_api.save_model(


## Deteksi Realtime

Model yang sudah ditrain dan label encoder (wajib):
https://drive.google.com/drive/folders/1bLMTPYfIp5zfRJIPQUcq3ueDLU05xeTU?usp=sharing


In [4]:
# load model untuk prediksi 
from tensorflow.keras.models import load_model
model_nn = load_model('model_nn.h5')




In [5]:
# load label encoder agar bisa decode prediksi
with open("label_encoder.pickle", "rb") as f:
    le = pickle.load(f)

In [None]:
# run prediksi dummy awal agar model ter-load dengan benar 
# dan graph internal dibangun sebelum dipakai realtime 
dummy_input = np.zeros((1, 42))
model_nn.predict(dummy_input, verbose=0)

array([[7.5987242e-03, 1.5704582e-06, 9.2116881e-10, 9.2272643e-07,
        2.8171509e-03, 2.7851065e-08, 3.7160673e-04, 2.0923621e-06,
        1.0361982e-02, 6.8443827e-02, 1.3001333e-07, 2.5886871e-08,
        4.4728269e-05, 2.7605607e-05, 5.0693921e-05, 7.4636861e-05,
        3.6073860e-05, 5.6500890e-06, 9.0478027e-01, 1.0578526e-06,
        3.0797406e-08, 8.9296082e-05, 9.4686579e-09, 3.5365172e-10,
        5.1047425e-03, 5.7884140e-06, 1.8139531e-04]], dtype=float32)

pencet 'q' untuk keluar

In [8]:
cap = cv2.VideoCapture(0)

frame_count = 0
SKIP_FRAMES = 3  # prediksi tiap 3 frame
last_prediction = ""
last_confidence = 0.0

# Mediapipe Hands (Mode Realtime)
with mp_hands.Hands(
    min_detection_confidence=0.6,
    min_tracking_confidence=0.9
) as hands:

    while cap.isOpened():
        
        # inisialisasi variabel
        data_aux = [] # menyimpan landmark x dan y sementara
        x_ = [] # menyimpan koordinat x
        y_ = [] # menyimpan koordinat y

        # Baca Frame Webcam
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1 

        H, W, _ = frame.shape # dimensi frame

        # Preprocessing Frame
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_rgb = cv2.flip(frame_rgb, 1)
        frame_rgb.flags.writeable = False
        results = hands.process(frame_rgb)
        frame_rgb.flags.writeable = True
        frame_rgb = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

        # dijalankan jika tangan terdeteksi
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame_rgb,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(28, 255, 3), thickness=2, circle_radius=3),
                    mp_drawing.DrawingSpec(color=(236, 255, 3), thickness=2, circle_radius=3)
                )
                
                # Ekstraksi Fitur
                for lm in hand_landmarks.landmark:
                    data_aux.extend([lm.x, lm.y])
                    x_.append(lm.x)
                    y_.append(lm.y)

            # bounding box
            margin = 20
            x1 = max(int(min(x_) * W) - margin, 0)
            y1 = max(int(min(y_) * H) - margin, 0)
            x2 = min(int(max(x_) * W) + margin, W)
            y2 = min(int(max(y_) * H) + margin, H)

            # ===== PREDIKSI DENGAN FRAME SKIPPING =====
            # Mengubah landmark menjadi format input model
            data_input = np.array(data_aux)[0:42].reshape(1, -1)

            # Prediksi hanya dilakukan setiap SKIP_FRAMES
            if frame_count % SKIP_FRAMES == 0:
                proba_all = model_nn.predict(data_input, verbose=0)[0]
                class_index = np.argmax(proba_all)

                last_prediction = le.inverse_transform([class_index])[0]
                last_confidence = proba_all[class_index]

            prediction = last_prediction
            confidence = last_confidence

            # tampilkan hasil
            cv2.rectangle(frame_rgb, (x1, y1), (x2, y2), (255, 99, 173), 4)
            cv2.putText(
                frame_rgb,
                f"{prediction} ({confidence:.2f})",
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_DUPLEX,
                1.8,
                (255, 0, 0),
                2,
                cv2.LINE_AA
            )

        cv2.imshow("frame", frame_rgb)
        # keluar dngan menekan 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


