In [None]:
# ==========================================
# 3_train_model.ipynb
# Training model pengenalan bahasa isyarat
# ==========================================

import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Load dataset dari folder processed
X, y = [], []
data_dir = "../../dataset/processed_words"

for label in os.listdir(data_dir):
    folder = os.path.join(data_dir, label)
    if not os.path.isdir(folder):
        continue
    for file in os.listdir(folder):
        if file.endswith(".npy"):
            data = np.load(os.path.join(folder, file), allow_pickle=True)
            if len(data) == 0:
                continue
            X.append(data)
            y.append(label)

print(f"✅ Total samples: {len(X)}")
print(f"📂 Total classes: {len(set(y))}")

# Encode label
le = LabelEncoder()
y_encoded = to_categorical(le.fit_transform(y))

# Padding biar semua sequence punya panjang sama
X_padded = pad_sequences(X, maxlen=30, dtype='float32', padding='post', truncating='post')

# Buat model
model = Sequential([
    LSTM(64, return_sequences=True, activation='relu', input_shape=(30, X_padded.shape[2])),
    LSTM(128, activation='relu'),
    Dense(len(le.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Training model
model.fit(X_padded, y_encoded, epochs=30, batch_size=8, validation_split=0.2)

# Simpan model ke format .keras (modern)
os.makedirs("../../models", exist_ok=True)
model.save("../../models/sign_word_model.keras")

# Simpan label encoder untuk inference nanti
import pickle
with open("../../models/label_word_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

print("✅ Model dan label encoder berhasil disimpan di folder '../models/'")


✅ Total samples: 10
📂 Total classes: 2
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 30, 64)            74240     
                                                                 
 lstm_3 (LSTM)               (None, 128)               98816     
                                                                 
 dense_1 (Dense)             (None, 2)                 258       
                                                                 
Total params: 173314 (677.01 KB)
Trainable params: 173314 (677.01 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21