In [1]:
# ================= TUNING LOOP (Random Search, no GridCV, no K-Fold) =================
# Persyaratan:
# - Variabel berikut sudah ada di lingkungan: 
#   X_signal_tensor, X_image_tensor, y_tensor, image_channels, num_classes, device
# - Fungsi util: create_multimodal_dataloader, run_epoch_multimodal
# - Model class: HybridTransformerEncoderCNN
# - Paket: numpy, pandas, torch, sklearn, matplotlib sudah diimpor

import os, time, random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import (
    classification_report, confusion_matrix,
    accuracy_score, precision_score, recall_score, f1_score
)
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight
from matplotlib.metrics import ConfusionMatrixDisplay

# ---------- Konfigurasi Tuning ----------
N_TRIALS = 10               # jumlah percobaan random search
VAL_SIZE  = 0.2             # proporsi validasi
EPOCHS    = 20
BATCH_TRAIN = 32
BATCH_VAL   = 32
EARLY_STOP_PATIENCE = 7      # opsi early stopping (boleh 0 untuk nonaktif)

# Ruang hiperparameter (random choice)
SEARCH_SPACE = {
    "d_model":        [256],
    "num_heads":      [8],
    "num_layers":     [4],
    "d_ff":           [1536],
    "dropout":        [0.3],
    "fusion_method":  ["attention"],
    "feature_dim":    [192],
    "lr":             [3e-4],
    "weight_decay":   [3e-5]
}
MAX_LEN_FIXED = 200  # mengikuti setup kamu

def sample_hparams():
    return {
        "d_model":       random.choice(SEARCH_SPACE["d_model"]),
        "num_heads":     random.choice(SEARCH_SPACE["num_heads"]),
        "num_layers":    random.choice(SEARCH_SPACE["num_layers"]),
        "d_ff":          random.choice(SEARCH_SPACE["d_ff"]),
        "dropout":       random.choice(SEARCH_SPACE["dropout"]),
        "fusion_method": random.choice(SEARCH_SPACE["fusion_method"]),
        "feature_dim":   random.choice(SEARCH_SPACE["feature_dim"]),
        "lr":            random.choice(SEARCH_SPACE["lr"]),
        "weight_decay":  random.choice(SEARCH_SPACE["weight_decay"]),
        "max_len":       MAX_LEN_FIXED,
    }

def train_val_split_stratified(X_sig, X_img, y, val_size=0.2, random_state=42):
    idx = np.arange(len(y))
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=val_size, random_state=random_state)
    tr_idx, va_idx = next(splitter.split(idx.reshape(-1,1), y.numpy()))
    return tr_idx, va_idx

def train_one_trial(trial_id, save_dir, X_signal_tensor, X_image_tensor, y_tensor, hps):
    print(f"\n========== Trial {trial_id}/{N_TRIALS} ==========")
    print("Hyperparameters:", hps)

    # Split (sekali, tanpa k-fold)
    tr_idx, va_idx = train_val_split_stratified(X_signal_tensor, X_image_tensor, y_tensor, VAL_SIZE, random_state=42+trial_id)
    X_sig_tr, X_sig_va = X_signal_tensor[tr_idx], X_signal_tensor[va_idx]
    X_img_tr, X_img_va = X_image_tensor[tr_idx], X_image_tensor[va_idx]
    y_tr,     y_va     = y_tensor[tr_idx],      y_tensor[va_idx]

    # Dataloader
    train_loader = create_multimodal_dataloader(X_sig_tr, X_img_tr, y_tr, batch_size=BATCH_TRAIN, shuffle=True)
    val_loader   = create_multimodal_dataloader(X_sig_va, X_img_va, y_va, batch_size=BATCH_VAL,   shuffle=False)

    # Model
    model = HybridTransformerEncoderCNN(
        signal_input_dim=1,
        image_input_channels=image_channels,
        d_model=hps["d_model"],
        num_heads=hps["num_heads"],
        num_layers=hps["num_layers"],
        d_ff=hps["d_ff"],
        dropout=hps["dropout"],
        num_classes=num_classes,
        fusion_method=hps["fusion_method"],
        feature_dim=hps["feature_dim"],
        max_len=hps["max_len"],
    ).to(device)

    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model parameters: {total_params:,}")

    # Loss + Optimizer
    class_weights = compute_class_weight('balanced', classes=np.unique(y_tr.numpy()), y=y_tr.numpy())
    class_weights = torch.FloatTensor(class_weights).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = torch.optim.AdamW(model.parameters(), lr=hps["lr"], weight_decay=hps["weight_decay"])

    # Training
    best_val_acc = -1.0
    best_state   = None
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    epochs_no_improve = 0

    print(f"Epoch 01/{EPOCHS}".replace("01", f"{1:02d}"), end="")  # untuk nuansa mirip log
    print("\r", end="")  # hapus garis sebelumnya agar rapi

    for epoch in range(1, EPOCHS+1):
        tr_loss, tr_acc, _, _ = run_epoch_multimodal(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc, y_true_ep, y_pred_ep = run_epoch_multimodal(model, val_loader, criterion, None, device)

        train_losses.append(tr_loss)
        val_losses.append(val_loss)
        train_accuracies.append(tr_acc)
        val_accuracies.append(val_acc)

        print(f"Epoch {epoch:02d}/{EPOCHS} | "
              f"Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | "
              f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

        # simpan model terbaik di trial ini
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict().copy()
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # Early stopping opsional
        if EARLY_STOP_PATIENCE and epochs_no_improve >= EARLY_STOP_PATIENCE:
            print(f"⏹️  Early stopping (no improve {EARLY_STOP_PATIENCE} epochs).")
            break

    # Muat state terbaik & simpan checkpoint trial
    model.load_state_dict(best_state)
    ckpt_name = f"hybrid_transformer_encoder_cnn_Tuning_trial{trial_id}.pth"
    torch.save(model.state_dict(), os.path.join(save_dir, ckpt_name))
    print(f"✅ Model saved: {ckpt_name}")

    # Evaluasi komputasional + metrik
    model.eval()
    y_true, y_pred = [], []
    inference_times, memory_usage = [], []

    with torch.no_grad():
        for signal_batch, image_batch, label_batch in val_loader:
            signal_batch = signal_batch.to(device)
            image_batch  = image_batch.to(device)
            if signal_batch.ndim == 2:
                signal_batch = signal_batch.unsqueeze(2)

            if torch.cuda.is_available():
                torch.cuda.synchronize()
            t0 = time.perf_counter()
            logits = model(signal_batch, image_batch)
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            t1 = time.perf_counter()

            batch_size = signal_batch.size(0)
            inference_times.append((t1 - t0) / batch_size)

            if torch.cuda.is_available():
                gpu_memory = torch.cuda.memory_allocated(device) / 1024**2
                memory_usage.append(gpu_memory)

            preds = torch.argmax(logits, dim=1).cpu().numpy()
            y_pred.extend(preds)
            y_true.extend(label_batch.numpy())

    avg_inference_time = float(np.mean(inference_times)) if inference_times else 0.0
    avg_memory_usage   = float(np.mean(memory_usage))   if memory_usage else 0.0
    print(f"⚡ Avg Inference Time: {avg_inference_time:.6f} s/sample")
    print(f"💾 Avg GPU Memory: {avg_memory_usage:.2f} MB")

    # Metrik ringkas trial
    cm = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))
    report = classification_report(y_true, y_pred, output_dict=True,
                                   target_names=["N","S","V","Q"], zero_division=0)

    # Specificity per-class
    cm_sum = cm.sum()
    TP = np.diag(cm)
    FP = cm.sum(axis=0) - TP
    FN = cm.sum(axis=1) - TP
    TN = cm_sum - (TP + FP + FN)
    specificity_per_class = TN / (TN + FP + 1e-8)
    specificity_mean = float(np.mean(specificity_per_class))

    # simpan visualisasi untuk TIAP TRIAL (opsional—boleh dinonaktifkan kalau ingin lebih cepat)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["N","S","V","Q"])
    fig, ax = plt.subplots(figsize=(5.5, 5))
    disp.plot(ax=ax, cmap="Blues", colorbar=False, values_format="d")
    plt.title(f"Confusion Matrix Hybrid Transformer-Encoder-CNN - Trial {trial_id}")
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f"hybrid_encoder_confusion_matrix_trial{trial_id}.png"), dpi=150)
    plt.close(fig)

    # Kurva akurasi & loss per trial
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    ax1.plot(train_accuracies, label="Train Accuracy", linewidth=2)
    ax1.plot(val_accuracies,   label="Validation Accuracy", linewidth=2)
    ax1.set_title(f"Accuracy - Trial {trial_id}", fontsize=12)
    ax1.set_xlabel("Epoch"); ax1.set_ylabel("Accuracy")
    ax1.legend(); ax1.grid(True, alpha=0.3)

    ax2.plot(train_losses, label="Train Loss", linewidth=2)
    ax2.plot(val_losses,   label="Validation Loss", linewidth=2)
    ax2.set_title(f"Loss - Trial {trial_id}", fontsize=12)
    ax2.set_xlabel("Epoch"); ax2.set_ylabel("Loss")
    ax2.legend(); ax2.grid(True, alpha=0.3)

    plt.suptitle(f"Training Progress - Trial {trial_id} (Hybrid Transformer-Encoder-CNN)", fontsize=14)
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f"hybrid_encoder_training_curves_trial{trial_id}.png"), dpi=150)
    plt.close(fig)

    # Ringkasan metrik weighted
    fold_summary = {
        "Trial": trial_id,
        "Accuracy":   float(accuracy_score(y_true, y_pred)),
        "Precision":  float(precision_score(y_true, y_pred, average="weighted", zero_division=0)),
        "Sensitivity":float(recall_score(y_true, y_pred, average="weighted", zero_division=0)),
        "Specificity":specificity_mean,
        "F1-Score":   float(f1_score(y_true, y_pred, average="weighted", zero_division=0)),
        "Avg_Inference_Time(s)": avg_inference_time,
        "Avg_Memory_Usage(MB)":  avg_memory_usage,
        # simpan hiperparameter agar bisa direpro
        **{f"hp_{k}": v for k, v in hps.items()}
    }

    # bersihkan
    del model, optimizer, train_loader, val_loader
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return fold_summary

def tune_hybrid_encoder_model(save_dir="checkpoints_hybrid_transformer_encoder_cnn"):
    print("\n🚀 Training Hybrid Transformer-Encoder-CNN Model...")
    print("=" * 60)
    os.makedirs(save_dir, exist_ok=True)

    all_trials = []

    best_val_acc = -1.0
    best_trial_id = None
    best_ckpt = None

    for trial in range(1, N_TRIALS+1):
        hps = sample_hparams()
        summary = train_one_trial(trial, save_dir, X_signal_tensor, X_image_tensor, y_tensor, hps)
        all_trials.append(summary)

        # update best
        if summary["Accuracy"] > best_val_acc:
            best_val_acc = summary["Accuracy"]
            best_trial_id = trial
            best_ckpt = os.path.join(save_dir, f"hybrid_transformer_encoder_cnn_Tuning_trial{trial}.pth")

    # Simpan ringkasan
    df_trials = pd.DataFrame(all_trials)
    # urutkan by Accuracy desc
    df_trials = df_trials.sort_values(by="Accuracy", ascending=False).reset_index(drop=True)

    # row average
    avg_row = {
        "Trial": "Average",
        "Accuracy": df_trials["Accuracy"].mean(),
        "Precision": df_trials["Precision"].mean(),
        "Sensitivity": df_trials["Sensitivity"].mean(),
        "Specificity": df_trials["Specificity"].mean(),
        "F1-Score": df_trials["F1-Score"].mean(),
        "Avg_Inference_Time(s)": df_trials["Avg_Inference_Time(s)"].mean(),
        "Avg_Memory_Usage(MB)": df_trials["Avg_Memory_Usage(MB)"].mean()
    }
    df_out = pd.concat([df_trials, pd.DataFrame([avg_row])], ignore_index=True)

    # Simpan CSV
    csv_path = os.path.join(save_dir, "hybrid_transformer_encoder_cnn_Tuning_trials_summary.csv")
    df_out.to_csv(csv_path, index=False)

    # Tandai pemenang: copy/rename ckpt best ke ..._best.pth
    if best_ckpt is not None:
        best_out = os.path.join(save_dir, "hybrid_transformer_encoder_cnn_Tuning_best.pth")
        # copy byte-for-byte
        with open(best_ckpt, "rb") as src, open(best_out, "wb") as dst:
            dst.write(src.read())

    print(f"\n✅ Semua model, grafik, dan metrik Hybrid Transformer-Encoder-CNN (Tuning) disimpan di folder: {save_dir}")
    print("\n📊 Final Results Summary (Hybrid Transformer-Encoder-CNN Tuning):")
    print("=" * 80)
    cols = ["Trial","Accuracy","Precision","Sensitivity","Specificity","F1-Score",
            "Avg_Inference_Time(s)","Avg_Memory_Usage(MB)"]
    print(df_out[cols].to_string(index=False))

    print("\n🏆 Best Trial:", best_trial_id, "| Best Accuracy:", f"{best_val_acc:.6f}")
    print("💾 Best checkpoint:", f"hybrid_transformer_encoder_cnn_Tuning_best.pth")

    return df_out

# ================= EXECUTION =================
print("🚀 Hybrid Transformer-Encoder-CNN untuk Input Multimodal (Sinyal + Gambar)")
print("=" * 70)

tuning_results = tune_hybrid_encoder_model()

print("\n🔧 Hybrid Transformer Encoder Model Features:")
print("=" * 60)
print("✅ Transformer Encoder Branch: Standard transformer encoder for temporal patterns")
print("✅ ECG Embedding: Direct projection of ECG signals to embedding space")
print("✅ Multi-Head Attention: 8 attention heads for pattern recognition")
print("✅ Positional Encoding: Sinusoidal position-aware embeddings")
print("✅ CNN Branch: Spatial feature extraction from GAF images") 
print("✅ Multimodal Fusion: Concatenation/Addition/Attention fusion options")
print("✅ Balanced Class Weights: Better performance on minority classes")
print("✅ Gradient Clipping: Training stability")
print("✅ AdamW Optimizer: Effective weight decay")
print("✅ Global Average Pooling: Sequence-to-vector conversion")
print("✅ ReLU Activation: Standard activation for stability")
print("✅ Layer Normalization: Training stability and faster convergence")

print("\n🔍 Transformer Encoder vs BERT Differences:")
print("=" * 50)
print("✅ Simpler Architecture: Direct embedding vs patch tokenization")
print("✅ Standard Attention: Multi-head self-attention without bidirectional context")
print("✅ Encoder-Only: No decoder component, focused on representation learning")
print("✅ Direct Processing: No special tokens (CLS, SEP) required")
print("✅ Computational Efficiency: Lower complexity than BERT")
print("✅ Faster Training: Simpler architecture enables faster convergence")

print("\n⚡ Performance Characteristics:")
print("=" * 40)
print("✅ Balanced Complexity: Not too simple, not too complex")
print("✅ Good Generalization: Standard transformer principles")
print("✅ Efficient Processing: Direct signal-to-embedding mapping")
print("✅ Stable Training: Well-established architecture patterns")
print("✅ Moderate Parameters: Balanced model size for dataset")

print("\n✅ HYBRID TRANSFORMER ENCODER TUNING COMPLETED!")
print(f"Check 'checkpoints_hybrid_transformer_encoder_cnn' folder for results.")
print(f"🎯 Expected: Balanced performance between simplicity and capability")
print(f"🚀 Standard Transformer Encoder provides robust baseline performance!")


🚀 Hybrid Transformer-Encoder-CNN untuk Input Multimodal (Sinyal + Gambar)

🚀 Training Hybrid Transformer-Encoder-CNN Model...

Model parameters: 5,902,724
Epoch 01/20 | Train Loss: 0.3157 Acc: 0.8820 | Val Loss: 0.3489 Acc: 0.8772
Epoch 02/20 | Train Loss: 0.1802 Acc: 0.9459 | Val Loss: 0.2240 Acc: 0.9359
Epoch 03/20 | Train Loss: 0.1503 Acc: 0.9555 | Val Loss: 0.2290 Acc: 0.9525
Epoch 04/20 | Train Loss: 0.1317 Acc: 0.9616 | Val Loss: 0.1387 Acc: 0.9652
Epoch 05/20 | Train Loss: 0.1914 Acc: 0.9593 | Val Loss: 0.1863 Acc: 0.9571
Epoch 06/20 | Train Loss: 0.1132 Acc: 0.9676 | Val Loss: 0.1499 Acc: 0.9647
Epoch 07/20 | Train Loss: 0.1054 Acc: 0.9709 | Val Loss: 0.1579 Acc: 0.9696
Epoch 08/20 | Train Loss: 0.0969 Acc: 0.9733 | Val Loss: 0.1466 Acc: 0.9652
Epoch 09/20 | Train Loss: 0.0918 Acc: 0.9742 | Val Loss: 0.1589 Acc: 0.9632
Epoch 10/20 | Train Loss: 0.0804 Acc: 0.9778 | Val Loss: 0.1162 Acc: 0.9734
Epoch 11/20 | Train Loss: 0.0796 Acc: 0.9789 | Val Loss: 0.1701 Acc: 0.9650
Epoch 12/