# ðŸ¤Ÿ BISINDO Sign Language Recognition - Training (Optimized)

Notebook ini sudah dioptimasi untuk mengatasi masalah akurasi rendah.

**Perubahan dari versi sebelumnya:**
1. Learning rate lebih kecil (0.0001)
2. Batch size lebih kecil (16)
3. Label smoothing untuk mencegah overconfidence
4. Data augmentation
5. Better model architecture

## 1. Setup & Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pickle
import json
import os
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model, regularizers
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau,
    LearningRateScheduler
)
from tensorflow.keras.utils import to_categorical

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.utils.class_weight import compute_class_weight

import matplotlib.pyplot as plt
import seaborn as sns

print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## 2. Load Data

In [None]:
# ============================================
# SESUAIKAN PATH INI
# ============================================
DATA_DIR = "/content/drive/MyDrive/BISINDO/processed"
MODEL_DIR = "/content/drive/MyDrive/BISINDO/models"

os.makedirs(MODEL_DIR, exist_ok=True)

# Load data
print("Loading data...")
X_train = np.load(f"{DATA_DIR}/X_train.npy")
X_test = np.load(f"{DATA_DIR}/X_test.npy")
y_train = np.load(f"{DATA_DIR}/y_train.npy")
y_test = np.load(f"{DATA_DIR}/y_test.npy")

with open(f"{DATA_DIR}/label_encoder.pkl", 'rb') as f:
    label_encoder = pickle.load(f)

print(f"X_train: {X_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"Classes: {len(label_encoder.classes_)}")

num_classes = len(label_encoder.classes_)
sequence_length = X_train.shape[1]
num_features = X_train.shape[2]

## 3. Data Preprocessing & Augmentation

In [None]:
def add_noise(X, noise_factor=0.01):
    """Add Gaussian noise to data."""
    noise = np.random.normal(0, noise_factor, X.shape)
    return X + noise

def time_shift(X, shift_max=5):
    """Shift sequence in time."""
    X_shifted = np.zeros_like(X)
    for i in range(len(X)):
        shift = np.random.randint(-shift_max, shift_max + 1)
        if shift > 0:
            X_shifted[i, shift:] = X[i, :-shift]
        elif shift < 0:
            X_shifted[i, :shift] = X[i, -shift:]
        else:
            X_shifted[i] = X[i]
    return X_shifted

def scale_landmarks(X, scale_range=(0.9, 1.1)):
    """Scale landmarks randomly."""
    scales = np.random.uniform(scale_range[0], scale_range[1], (len(X), 1, 1))
    return X * scales

# Augment training data
print("Augmenting training data...")

X_train_aug1 = add_noise(X_train, noise_factor=0.02)
X_train_aug2 = time_shift(X_train, shift_max=3)
X_train_aug3 = scale_landmarks(X_train)

# Combine original + augmented
X_train_combined = np.concatenate([X_train, X_train_aug1, X_train_aug2, X_train_aug3], axis=0)
y_train_combined = np.concatenate([y_train, y_train, y_train, y_train], axis=0)

# Shuffle
shuffle_idx = np.random.permutation(len(X_train_combined))
X_train_combined = X_train_combined[shuffle_idx]
y_train_combined = y_train_combined[shuffle_idx]

print(f"Original training size: {len(X_train)}")
print(f"Augmented training size: {len(X_train_combined)}")

# Clip values to [0, 1]
X_train_combined = np.clip(X_train_combined, 0, 1)

# One-hot encode
y_train_cat = to_categorical(y_train_combined, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

In [None]:
# Compute class weights untuk handle imbalance
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_combined),
    y=y_train_combined
)
class_weight_dict = dict(enumerate(class_weights))
print("Class weights computed.")

## 4. Model Architecture (Improved)

In [None]:
def build_improved_lstm(sequence_length, num_features, num_classes):
    """
    Improved LSTM with:
    - Batch normalization
    - L2 regularization
    - Proper dropout
    """
    inputs = layers.Input(shape=(sequence_length, num_features))

    # Batch normalization on input
    x = layers.BatchNormalization()(inputs)

    # First LSTM layer
    x = layers.Bidirectional(
        layers.LSTM(128, return_sequences=True,
                    kernel_regularizer=regularizers.l2(0.001),
                    recurrent_regularizer=regularizers.l2(0.001))
    )(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)

    # Second LSTM layer
    x = layers.Bidirectional(
        layers.LSTM(64, return_sequences=True,
                    kernel_regularizer=regularizers.l2(0.001),
                    recurrent_regularizer=regularizers.l2(0.001))
    )(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)

    # Third LSTM layer
    x = layers.Bidirectional(
        layers.LSTM(32, return_sequences=False,
                    kernel_regularizer=regularizers.l2(0.001))
    )(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)

    # Dense layers
    x = layers.Dense(128, activation='relu',
                     kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)

    x = layers.Dense(64, activation='relu',
                     kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.Dropout(0.3)(x)

    # Output layer
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model


def build_simple_lstm(sequence_length, num_features, num_classes):
    """
    Simple LSTM for baseline testing.
    """
    model = keras.Sequential([
        layers.Input(shape=(sequence_length, num_features)),

        layers.BatchNormalization(),

        layers.LSTM(64, return_sequences=True),
        layers.Dropout(0.3),

        layers.LSTM(32, return_sequences=False),
        layers.Dropout(0.3),

        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),

        layers.Dense(num_classes, activation='softmax')
    ])

    return model

In [None]:
# ============================================
# PILIH MODEL
# ============================================
USE_SIMPLE_MODEL = False  # Set True untuk model sederhana

if USE_SIMPLE_MODEL:
    model = build_simple_lstm(sequence_length, num_features, num_classes)
    model_name = "bisindo_simple_lstm"
else:
    model = build_improved_lstm(sequence_length, num_features, num_classes)
    model_name = "bisindo_improved_lstm"

model.summary()

## 5. Training Configuration

In [None]:
# Learning rate schedule
def lr_schedule(epoch, lr):
    """Learning rate warmup + decay."""
    if epoch < 10:
        # Warmup
        return 0.0001 * (epoch + 1) / 10
    elif epoch < 50:
        return 0.0001
    elif epoch < 100:
        return 0.00005
    else:
        return 0.00001

# Compile
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=30,
        restore_best_weights=True,
        verbose=1,
        min_delta=0.001
    ),

    ModelCheckpoint(
        filepath=f"{MODEL_DIR}/{model_name}_best.keras",
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),

    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=10,
        min_lr=1e-7,
        verbose=1
    ),

    LearningRateScheduler(lr_schedule, verbose=0)
]

print("Model compiled!")

## 6. Training

In [None]:
# ============================================
# TRAINING
# ============================================
EPOCHS = 150
BATCH_SIZE = 16

print(f"Training {model_name}...")
print(f"  Epochs: {EPOCHS}")
print(f"  Batch size: {BATCH_SIZE}")
print(f"  Training samples: {len(X_train_combined)}")
print("=" * 50)

history = model.fit(
    X_train_combined, y_train_cat,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    callbacks=callbacks,
    class_weight=class_weight_dict,
    verbose=1
)

## 7. Evaluation

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(history.history['accuracy'], label='Train')
axes[0].plot(history.history['val_accuracy'], label='Validation')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(history.history['loss'], label='Train')
axes[1].plot(history.history['val_loss'], label='Validation')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig(f"{MODEL_DIR}/{model_name}_history.png", dpi=150)
plt.show()

In [None]:
# Evaluate on test set (original, non-augmented)
print("Evaluating on test set...")

test_loss, test_accuracy = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")

In [None]:
# Predictions
y_pred = np.argmax(model.predict(X_test), axis=1)

print("\n" + "=" * 60)
print("CLASSIFICATION REPORT")
print("=" * 60)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(14, 12))
sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=label_encoder.classes_,
    yticklabels=label_encoder.classes_
)
plt.title(f'Confusion Matrix - {model_name}\nAccuracy: {test_accuracy*100:.2f}%')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(f"{MODEL_DIR}/{model_name}_confusion_matrix.png", dpi=150)
plt.show()

## 8. Save Model

In [None]:
# Save final model
model.save(f"{MODEL_DIR}/{model_name}_final.keras")
print(f"Model saved: {MODEL_DIR}/{model_name}_final.keras")

# Save info
info = {
    'model_name': model_name,
    'test_accuracy': float(test_accuracy),
    'test_loss': float(test_loss),
    'epochs_trained': len(history.history['loss']),
    'sequence_length': sequence_length,
    'num_features': num_features,
    'num_classes': num_classes,
    'classes': list(label_encoder.classes_),
    'augmentation': True,
    'training_samples': len(X_train_combined)
}

with open(f"{MODEL_DIR}/{model_name}_info.json", 'w') as f:
    json.dump(info, f, indent=2)

# Copy label encoder
import shutil
shutil.copy(f"{DATA_DIR}/label_encoder.pkl", f"{MODEL_DIR}/label_encoder.pkl")

print("\nAll files saved!")
print(f"  - {model_name}_final.keras")
print(f"  - {model_name}_best.keras")
print(f"  - {model_name}_info.json")
print(f"  - label_encoder.pkl")

---

## ðŸŽ¯ Tips Jika Akurasi Masih Rendah

1. **Coba `USE_SIMPLE_MODEL = True`** di cell model selection
2. **Tambah epochs** ke 200-300
3. **Kurangi batch size** ke 8
4. **Cek data augmentation** - bisa disable dulu untuk baseline