In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping 
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:

# ================= KONFIGURASI =================
DATASET_FILE = '/home/ahmad/Documents/Prata/Code/ML/gesture-ml-project_V1/src/dataset/eye_dataset_wheelchair_continuous.csv'
MODEL_FILENAME = 'wheelchair_gaze_model.h5'
SCALER_FILENAME = 'gaze_scaler.pkl'
ENCODER_FILENAME = 'label_encoder.pkl'

# Fitur yang digunakan
FEATURES = ['dx_rel', 'dy_rel', 'ear_left', 'ear_right']

# Sequence Length (Jendela Waktu)
SEQUENCE_LENGTH = 20 

In [None]:

# ================= 1. LOAD & PREPROCESS DATA =================

def load_data(filepath):
    print(f"Loading dataset: {filepath}...")
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File {filepath} tidak ditemukan!")
        
    df = pd.read_csv(filepath)
    
    # Filter noise jarak
    df = df[(df['dist_cm'] > 20) & (df['dist_cm'] < 80)]
    
    print(f"Total data bersih: {len(df)} baris")
    print(f"Distribusi Label:\n{df['label'].value_counts()}")
    return df

def create_sequences(data, labels, time_steps=1):
    Xs, ys = [], []
    for i in range(len(data) - time_steps):
        # Cek konsistensi label dalam satu window
        label_window = labels.iloc[i:(i + time_steps)]
        if label_window.nunique() == 1: 
            v = data.iloc[i:(i + time_steps)].values
            Xs.append(v)
            ys.append(label_window.iloc[-1])
            
    return np.array(Xs), np.array(ys)

# Main Process
if __name__ == "__main__":
    try:
        df = load_data(DATASET_FILE)
    except Exception as e:
        print(f"Error: {e}")
        exit()

    # A. Encoding Label
    encoder = LabelEncoder()
    df['label_encoded'] = encoder.fit_transform(df['label'])
    labels_map = dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))
    print(f"Label Mapping: {labels_map}")

    # B. Normalisasi (Scaling)
    scaler = MinMaxScaler()
    df[FEATURES] = scaler.fit_transform(df[FEATURES])

    # C. Membuat Sequence
    print("Membuat Sequence Data...")
    X, y = create_sequences(df[FEATURES], df['label_encoded'], SEQUENCE_LENGTH)
    
    if len(X) == 0:
        print("ERROR: Data tidak cukup untuk membuat sequence.")
        exit()

    # D. One-Hot Encoding
    y_cat = to_categorical(y)

    # E. SPLIT DATA 3-WAY (Train 70%, Val 15%, Test 15%)
    # Langkah 1: Pisahkan Train (70%) dan Sisa (30%)
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y_cat, test_size=0.3, random_state=42, stratify=y
    )
    
    # Langkah 2: Pisahkan Sisa menjadi Val (50% dari sisa) dan Test (50% dari sisa)
    # 0.5 * 0.3 = 0.15 (15% total)
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=42, stratify=np.argmax(y_temp, axis=1)
    )

    print(f"Data Training   : {X_train.shape[0]} samples")
    print(f"Data Validation : {X_val.shape[0]} samples")
    print(f"Data Testing    : {X_test.shape[0]} samples")


In [None]:

    # ================= 2. MEMBANGUN MODEL BiLSTM =================

    model = Sequential()
    model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.3)) 
    model.add(LSTM(32, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(y_cat.shape[1], activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    

In [None]:

    # ================= 3. TRAINING =================
    
    checkpoint = ModelCheckpoint(
        MODEL_FILENAME, 
        monitor='val_loss', 
        verbose=1, 
        save_best_only=True, 
        mode='min'
    )
    
    early_stop = EarlyStopping(
        monitor='val_loss', 
        patience=10, 
        restore_best_weights=True
    )

    print("\nMulai Training...")
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_val, y_val), # Menggunakan data validasi eksplisit
        callbacks=[checkpoint, early_stop],
        verbose=1
    )


In [None]:

    # ================= 4. EVALUASI COMPREHENSIVE =================
    
    print("\n" + "="*30)
    print("EVALUASI MODEL (DATA TESTING)")
    print("="*30)

    # Prediksi Data Test (Data yang belum pernah dilihat model)
    y_pred_prob = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred_prob, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    # 1. Metrics Dasar
    acc = accuracy_score(y_true_classes, y_pred_classes)
    prec = precision_score(y_true_classes, y_pred_classes, average='weighted')
    rec = recall_score(y_true_classes, y_pred_classes, average='weighted')
    f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

    print(f"\nOverall Accuracy  : {acc*100:.2f}%")
    print(f"Weighted Precision: {prec*100:.2f}%")
    print(f"Weighted Recall   : {rec*100:.2f}%")
    print(f"Weighted F1-Score : {f1*100:.2f}%")

    # 2. Classification Report (Detail per Label)
    print("\nDetail Per Kelas:")
    print(classification_report(y_true_classes, y_pred_classes, target_names=encoder.classes_))

    # 3. Confusion Matrix Visualization
    cm = confusion_matrix(y_true_classes, y_pred_classes)
    
    # Simpan Scaler & Encoder
    joblib.dump(scaler, SCALER_FILENAME)
    joblib.dump(encoder, ENCODER_FILENAME)
    print("\n[INFO] Scaler & Encoder disimpan.")

    # Plotting Evaluasi
    try:
        plt.figure(figsize=(15, 5))
        
        # Plot A: Accuracy & Loss
        plt.subplot(1, 3, 1)
        plt.plot(history.history['accuracy'], label='Train Acc')
        plt.plot(history.history['val_accuracy'], label='Val Acc')
        plt.title('Training History')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()

        # Plot B: Confusion Matrix
        plt.subplot(1, 3, 2)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=encoder.classes_, yticklabels=encoder.classes_)
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')

        # Plot C: Bar Chart Metrics
        plt.subplot(1, 3, 3)
        metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
        values = [acc, prec, rec, f1]
        bars = plt.bar(metrics, values, color=['#4285F4', '#34A853', '#FBBC05', '#EA4335'])
        plt.title('Model Performance Metrics')
        plt.ylim(0, 1.1)
        # Menambah label angka di atas bar
        for bar in bars:
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.2f}', ha='center', va='bottom')

        plt.tight_layout()
        plt.show()
    except Exception as e:
        print(f"Gagal menampilkan plot: {e}")