# Akuisisi Data

In [None]:
# DATA BISA DIAKUISISI DENGAN MENGGUNAKAN KAGGLE API ATAU 
# MENGUNDUH MANUAL DARI HALAMAN KAGGLE:
# https://www.kaggle.com/datasets/kapillondhe/american-sign-language

# UNZIP DATASET LALU LETAKKAN DATASET DI DIREKTORI YANG SAMA 
# DENGAN FILE INI

# Import Library

In [None]:
import os
import cv2
import mediapipe as mp
import pickle
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Init Awal

In [None]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [4]:
TRAIN_DIR = 'ASL_Dataset/Train'
TEST_DIR  = 'ASL_Dataset/Test'

# Fungsi Ekstrak Keypoints

In [None]:

from tqdm import tqdm  # progress bar

def extract_landmarks_with_progress(data_dir, hands):
    """
    Ekstrak landmark Mediapipe dari semua gambar di folder dataset

    :param data_dir: folder dataset (Train atau Test)
    :param hands: instance mp_hands.Hands
    :return: np.array(data), np.array(labels)
    """
    data = []
    labels = []

    labels_list = sorted([l for l in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, l))])
    total_images = sum(len(os.listdir(os.path.join(data_dir, l))) for l in labels_list)
    print(f"Total labels: {len(labels_list)}, Total images: {total_images}\n")

    image_count = 0

    for label in labels_list:
        label_path = os.path.join(data_dir, label)
        img_files = os.listdir(label_path)
        print(f"Processing label '{label}' with {len(img_files)} images...")

        for img_name in tqdm(img_files, desc=f"{label}", unit="img"):
            img_path = os.path.join(label_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = hands.process(img_rgb)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    data_aux = []
                    for lm in hand_landmarks.landmark:
                        data_aux.extend([lm.x, lm.y])

                    data.append(data_aux)
                    labels.append(label)

            image_count += 1

    print(f"\nExtraction completed. Total images processed: {image_count}")
    return np.array(data), np.array(labels)


# Visualisasi Awal

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

TRAIN_DIR = 'ASL_Dataset/Train'

with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5
) as hands:

    for label in sorted(os.listdir(TRAIN_DIR)):
        label_path = os.path.join(TRAIN_DIR, label)

        if not os.path.isdir(label_path):
            continue

        # ambil 1 gambar saja per kelas
        img_files = os.listdir(label_path)
        if len(img_files) == 0:
            continue

        img_path = os.path.join(label_path, img_files[0])
        img = cv2.imread(img_path)
        if img is None:
            continue

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = hands.process(img_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    img_rgb,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

        plt.figure(figsize=(4, 4))
        plt.title(label)
        plt.imshow(img_rgb)
        plt.axis('off')

plt.show()


# Ekstraksi

In [15]:
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.9
) as hands:
    X_train, y_train = extract_landmarks_with_progress(TRAIN_DIR, hands)


Total labels: 28, Total images: 165670

Processing label 'A' with 5996 images...


A: 100%|██████████| 5996/5996 [05:12<00:00, 19.16img/s]


Processing label 'B' with 5996 images...


B: 100%|██████████| 5996/5996 [05:11<00:00, 19.27img/s]


Processing label 'C' with 5996 images...


C: 100%|██████████| 5996/5996 [04:29<00:00, 22.25img/s]


Processing label 'D' with 5996 images...


D: 100%|██████████| 5996/5996 [03:26<00:00, 29.10img/s]


Processing label 'E' with 5996 images...


E: 100%|██████████| 5996/5996 [05:22<00:00, 18.60img/s]


Processing label 'F' with 5996 images...


F: 100%|██████████| 5996/5996 [05:08<00:00, 19.45img/s]


Processing label 'G' with 5996 images...


G: 100%|██████████| 5996/5996 [05:08<00:00, 19.41img/s]


Processing label 'H' with 5996 images...


H: 100%|██████████| 5996/5996 [05:05<00:00, 19.60img/s]


Processing label 'I' with 5996 images...


I: 100%|██████████| 5996/5996 [05:04<00:00, 19.72img/s]


Processing label 'J' with 5996 images...


J: 100%|██████████| 5996/5996 [05:07<00:00, 19.49img/s]


Processing label 'K' with 5996 images...


K: 100%|██████████| 5996/5996 [04:34<00:00, 21.82img/s]


Processing label 'L' with 5996 images...


L: 100%|██████████| 5996/5996 [04:45<00:00, 20.97img/s]


Processing label 'M' with 5996 images...


M: 100%|██████████| 5996/5996 [03:47<00:00, 26.31img/s]


Processing label 'N' with 5996 images...


N: 100%|██████████| 5996/5996 [02:57<00:00, 33.71img/s]


Processing label 'Nothing' with 5996 images...


Nothing: 100%|██████████| 5996/5996 [02:47<00:00, 35.83img/s]


Processing label 'O' with 5996 images...


O: 100%|██████████| 5996/5996 [04:10<00:00, 23.96img/s]


Processing label 'P' with 5996 images...


P: 100%|██████████| 5996/5996 [03:57<00:00, 25.27img/s]


Processing label 'Q' with 5996 images...


Q: 100%|██████████| 5996/5996 [04:06<00:00, 24.29img/s]


Processing label 'R' with 5966 images...


R: 100%|██████████| 5966/5966 [05:25<00:00, 18.35img/s]


Processing label 'S' with 5996 images...


S: 100%|██████████| 5996/5996 [04:58<00:00, 20.11img/s]


Processing label 'Space' with 5886 images...


Space: 100%|██████████| 5886/5886 [03:40<00:00, 26.70img/s]


Processing label 'T' with 5648 images...


T: 100%|██████████| 5648/5648 [04:56<00:00, 19.08img/s]


Processing label 'U' with 4542 images...


U: 100%|██████████| 4542/4542 [04:04<00:00, 18.57img/s]


Processing label 'V' with 5996 images...


V: 100%|██████████| 5996/5996 [05:23<00:00, 18.54img/s]


Processing label 'W' with 5996 images...


W: 100%|██████████| 5996/5996 [05:19<00:00, 18.78img/s]


Processing label 'X' with 5996 images...


X: 100%|██████████| 5996/5996 [05:18<00:00, 18.80img/s]


Processing label 'Y' with 5720 images...


Y: 100%|██████████| 5720/5720 [05:01<00:00, 19.00img/s]


Processing label 'Z' with 5996 images...


Z: 100%|██████████| 5996/5996 [04:05<00:00, 24.40img/s]



Extraction completed. Total images processed: 165670


In [14]:
with mp_hands.Hands( 
    static_image_mode=True, 
    max_num_hands=1, 
    min_detection_confidence=0.9 
) as hands:
    X_test, y_test = extract_landmarks_with_progress(TEST_DIR, hands)

Total labels: 28, Total images: 112

Processing label 'A' with 4 images...


A: 100%|██████████| 4/4 [00:00<00:00, 15.82img/s]


Processing label 'B' with 4 images...


B: 100%|██████████| 4/4 [00:00<00:00, 18.32img/s]


Processing label 'C' with 4 images...


C: 100%|██████████| 4/4 [00:00<00:00, 18.51img/s]


Processing label 'D' with 4 images...


D: 100%|██████████| 4/4 [00:00<00:00, 28.17img/s]


Processing label 'E' with 4 images...


E: 100%|██████████| 4/4 [00:00<00:00, 18.41img/s]


Processing label 'F' with 4 images...


F: 100%|██████████| 4/4 [00:00<00:00, 18.52img/s]


Processing label 'G' with 4 images...


G: 100%|██████████| 4/4 [00:00<00:00, 18.74img/s]


Processing label 'H' with 4 images...


H: 100%|██████████| 4/4 [00:00<00:00, 17.48img/s]


Processing label 'I' with 4 images...


I: 100%|██████████| 4/4 [00:00<00:00, 17.80img/s]


Processing label 'J' with 4 images...


J: 100%|██████████| 4/4 [00:00<00:00, 17.66img/s]


Processing label 'K' with 4 images...


K: 100%|██████████| 4/4 [00:00<00:00, 18.27img/s]


Processing label 'L' with 4 images...


L: 100%|██████████| 4/4 [00:00<00:00, 18.58img/s]


Processing label 'M' with 4 images...


M: 100%|██████████| 4/4 [00:00<00:00, 18.74img/s]


Processing label 'N' with 4 images...


N: 100%|██████████| 4/4 [00:00<00:00, 31.27img/s]


Processing label 'Nothing' with 4 images...


Nothing: 100%|██████████| 4/4 [00:00<00:00, 34.25img/s]


Processing label 'O' with 4 images...


O: 100%|██████████| 4/4 [00:00<00:00, 33.94img/s]


Processing label 'P' with 4 images...


P: 100%|██████████| 4/4 [00:00<00:00, 34.66img/s]


Processing label 'Q' with 4 images...


Q: 100%|██████████| 4/4 [00:00<00:00, 34.82img/s]


Processing label 'R' with 4 images...


R: 100%|██████████| 4/4 [00:00<00:00, 18.16img/s]


Processing label 'S' with 4 images...


S: 100%|██████████| 4/4 [00:00<00:00, 18.01img/s]


Processing label 'Space' with 4 images...


Space: 100%|██████████| 4/4 [00:00<00:00, 28.21img/s]


Processing label 'T' with 4 images...


T: 100%|██████████| 4/4 [00:00<00:00, 18.24img/s]


Processing label 'U' with 4 images...


U: 100%|██████████| 4/4 [00:00<00:00, 18.84img/s]


Processing label 'V' with 4 images...


V: 100%|██████████| 4/4 [00:00<00:00, 18.06img/s]


Processing label 'W' with 4 images...


W: 100%|██████████| 4/4 [00:00<00:00, 18.27img/s]


Processing label 'X' with 4 images...


X: 100%|██████████| 4/4 [00:00<00:00, 18.38img/s]


Processing label 'Y' with 4 images...


Y: 100%|██████████| 4/4 [00:00<00:00, 17.94img/s]


Processing label 'Z' with 4 images...


Z: 100%|██████████| 4/4 [00:00<00:00, 23.76img/s]


Extraction completed. Total images processed: 112





# Simpan hasil ekstraksi dengan pickle

In [16]:
import pickle

# Simpan data train
with open('train_data.pickle', 'wb') as f:
    pickle.dump({'data': X_train, 'labels': y_train}, f)

# Simpan data test
with open('test_data.pickle', 'wb') as f:
    pickle.dump({'data': X_test, 'labels': y_test}, f)


Data train dan test yang sudah siap di train:


In [None]:
## Load data train dan test
with open('train_data.pickle', 'rb') as f:
    train = pickle.load(f)
X_train = train['data']
y_train = train['labels']

with open('test_data.pickle', 'rb') as f:
    test = pickle.load(f)
X_test = test['data']
y_test = test['labels']


# Training Model

In [18]:
# Inisialisasi model
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

In [19]:
# Train
print("Training model...")
model.fit(X_train, y_train)

Training model...


In [20]:
# Pred
y_pred = model.predict(X_test)

In [21]:
# Eval
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc*100:.2f}%")

Accuracy: 100.00%


In [22]:

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00         4
           B       1.00      1.00      1.00         4
           C       1.00      1.00      1.00         4
           D       1.00      1.00      1.00         1
           E       1.00      1.00      1.00         4
           F       1.00      1.00      1.00         4
           G       1.00      1.00      1.00         4
           H       1.00      1.00      1.00         4
           I       1.00      1.00      1.00         4
           J       1.00      1.00      1.00         4
           K       1.00      1.00      1.00         4
           L       1.00      1.00      1.00         4
           M       1.00      1.00      1.00         4
           R       1.00      1.00      1.00         4
           S       1.00      1.00      1.00         4
       Space       1.00      1.00      1.00         1
           T       1.00      1.00      1.00         4
   

# Simpan Model

In [23]:
with open('model.p', 'wb') as f:
    pickle.dump({'model': model}, f)

# Deteksi Tangan Realtime

Model yang sudah ditrain:
https://drive.google.com/file/d/1fKEbXuT95EkuR8lrdrfD_TIDb_5KvNQX/view?usp=sharing

Lalu letakkan model ke folder projek

In [24]:
with open('model.p', 'rb') as f:
    model_dict = pickle.load(f)
model_tes1 = model_dict['model']


In [None]:
cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.9) as hands:
    while cap.isOpened():

        data_aux=[]
        x_ = []
        y_ = []

        ret, frame = cap.read()
        H, W, _ = frame.shape

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_rgb = cv2.flip(frame_rgb, 1)
        frame_rgb.flags.writeable = False
        results = hands.process(frame_rgb)
        frame_rgb.flags.writeable = True 
        frame_rgb = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame_rgb, # img to draw
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(28, 255, 3), thickness=2, circle_radius=3),
                    mp_drawing.DrawingSpec(color=(236, 255, 3), thickness=2, circle_radius=3)
                )


            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x)
                    data_aux.append(y)
                    x_.append(x)
                    y_.append(y)

            margin = 20  # 20 atua 30
            x1 = max(int(min(x_) * W) - margin, 0)
            y1 = max(int(min(y_) * H) - margin, 0)
            x2 = min(int(max(x_) * W) + margin, W)
            y2 = min(int(max(y_) * H) + margin, H)
            prediction = model_tes1.predict([np.array(data_aux)[0:42]])[0]

            cv2.rectangle(frame_rgb, (x1, y1), (x2, y2), (255,99,173), 6)
            cv2.putText(frame_rgb, prediction, (x1,y1), cv2.FONT_HERSHEY_DUPLEX, 5, (255,0,0), 5, cv2.LINE_AA)

        cv2.imshow('frame',frame_rgb)  
        key = cv2.waitKey(1)
        if key == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

