In [None]:
import os, random, math, time, json
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchaudio

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

SR_LIST = [16000]  
SR_LIST = sorted(set(int(s) for s in SR_LIST))  
SR = SR_LIST[-1] 

NOISE_COLOR = "white"  
N_MFCC = 30           
N_MELS = 40           

def mfcc_params_for_sr(sr: int):
    sr = int(sr)
    if sr == 16000:
        return {"n_fft": 512, "win_length": 400, "hop_length": 160}
    if sr == 8000:
        return {"n_fft": 256, "win_length": 200, "hop_length": 80}
    win = int(sr * 0.025)
    hop = int(sr * 0.010)
    def _next_pow2(x):
        p = 1
        while p < x:
            p <<= 1
        return p
    n_fft = _next_pow2(win)
    return {"n_fft": n_fft, "win_length": win, "hop_length": hop}

if "COMMIT_MODE" not in globals():
    COMMIT_MODE = False


def set_seed(seed: int = 42, deterministic: bool = True):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = bool(deterministic)
    torch.backends.cudnn.benchmark = not bool(deterministic)

USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
PIN_MEMORY = True if USE_CUDA else False
NON_BLOCK = True if USE_CUDA else False

if not COMMIT_MODE:
    print("Device:", device)

ROOT_DIR = "/kaggle/input/speech-commands"
NOISE_DIR = "/kaggle/input/speech-commands/_background_noise_"
WHITE_NOISE_FILE = "white_noise.wav"
PINK_NOISE_FILE  = "pink_noise.wav"

SELECTED_CLASSES = ["down", "left", "right", "up"]
CLASS_TO_IDX = {c: i for i, c in enumerate(SELECTED_CLASSES)}

MAX_LENGTH = SR  

BATCH_SIZE = 64
EPOCHS = 10
N_SPLITS = 5
SEEDS = [36, 38, 42]

NOISE_PROB = 0.30
SNR_RANGE = (5, 20)

NUM_WORKERS = max(2, (os.cpu_count() or 2) // 2)

In [None]:
def resolve_noise_path(noise_dir: str, filename: str):
    if not filename:
        return None
    p = os.path.join(noise_dir, filename)
    return p if os.path.exists(p) else None

WHITE_PATH = resolve_noise_path(NOISE_DIR, WHITE_NOISE_FILE)
PINK_PATH  = resolve_noise_path(NOISE_DIR, PINK_NOISE_FILE)

if str(NOISE_COLOR).lower() == "pink":
    ACTIVE_NOISE_PATH = PINK_PATH
    ACTIVE_NOISE_NAME = "pink"
else:
    ACTIVE_NOISE_PATH = WHITE_PATH
    ACTIVE_NOISE_NAME = "white"

if not COMMIT_MODE:
    print(f"Using noise: {ACTIVE_NOISE_NAME} -> {ACTIVE_NOISE_PATH}")

def add_specific_noise(
    waveform: torch.Tensor,
    noise_path: str,
    snr_db: float,
    target_sr: int = SR
):
    if (noise_path is None) or (not os.path.exists(noise_path)):
        return waveform

    noise, sr = torchaudio.load(noise_path)   
    if noise.dim() == 1:
        noise = noise.unsqueeze(0)
    if noise.shape[0] > 1:
        noise = noise.mean(dim=0, keepdim=True)  

    if sr != target_sr:
        noise = torchaudio.functional.resample(noise, sr, target_sr)

    T = waveform.shape[1]
    if noise.shape[1] < T:
        repeat = math.ceil(T / noise.shape[1])
        noise = noise.repeat(1, repeat)
    noise = noise[:, :T]

    noise = noise.to(waveform.device, dtype=waveform.dtype)

    sig_power = waveform.pow(2).mean().clamp(min=1e-12)
    noise_power = noise.pow(2).mean().clamp(min=1e-12)
    snr_linear = 10.0 ** (snr_db / 10.0)
    scale = torch.sqrt(sig_power / (snr_linear * noise_power))

    noisy = waveform + scale * noise
    return torch.clamp(noisy, -1.0, 1.0)

In [None]:
class VoiceCommandDatasetWithNoise(Dataset):
    def __init__(self, samples, class_to_idx,
                 white_noise_path=None, is_training=True,
                 max_length=MAX_LENGTH, noise_prob=0.0, snr_range=(5, 20),
                 sr=SR, n_mfcc=N_MFCC,
                 n_fft=400, hop_length=160, win_length=400, n_mels=None):
        self.samples = samples
        self.class_to_idx = class_to_idx
        self.white_noise_path = white_noise_path
        self.is_training = is_training
        self.max_length = max_length
        self.noise_prob = float(noise_prob)
        self.snr_range = snr_range
        self.sr = sr

        if n_mels is None:
            n_mels = n_mfcc

        self.mfcc = torchaudio.transforms.MFCC(
            sample_rate=sr, n_mfcc=n_mfcc,
            melkwargs={
                "n_fft": n_fft,
                "win_length": win_length,
                "hop_length": hop_length,
                "n_mels": n_mels,
            }
        )

        if not COMMIT_MODE:
            if is_training and white_noise_path:
                print(f"[Train] white noise={os.path.basename(white_noise_path)} | p={self.noise_prob} | SNR={snr_range}")
            elif not is_training:
                print("[Val/Test] NO noise")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx: int):
        path, cname = self.samples[idx]
        y = int(self.class_to_idx[cname])

        x, sr = torchaudio.load(path)  
        if x.dim() == 1:
            x = x.unsqueeze(0)
        if x.shape[0] > 1:
            x = x.mean(dim=0, keepdim=True)  

        if sr != self.sr:
            x = torchaudio.functional.resample(x, sr, self.sr)

        T = x.shape[1]
        if T < self.max_length:
            x = F.pad(x, (0, self.max_length - T))
        else:
            x = x[:, :self.max_length]

        if self.is_training and self.white_noise_path and (random.random() < self.noise_prob):
            snr_db = random.uniform(*self.snr_range)
            x = add_specific_noise(x, self.white_noise_path, snr_db, self.sr)

        x = x.to(dtype=torch.float32)
        feat = self.mfcc(x).squeeze(0).transpose(0, 1).contiguous()

        return feat, y

In [None]:
from pathlib import Path
from collections import Counter

def collect_samples_4classes(root_dir: str, selected_classes, exts=(".wav", ".WAV")):
    samples = []
    root = Path(root_dir)

    for cname in selected_classes:
        cdir = root / cname
        if not cdir.is_dir():
            if not COMMIT_MODE:
                print(f"Folder kelas tidak ada: {cdir}")
            continue

        for ent in cdir.iterdir():
            if ent.is_file() and ent.suffix.lower() == ".wav":
                samples.append((str(ent), cname))

    samples.sort(key=lambda x: x[0])
    return samples

all_samples = collect_samples_4classes(ROOT_DIR, SELECTED_CLASSES)

if not COMMIT_MODE:
    print(f"Total files (classes={len(SELECTED_CLASSES)}): {len(all_samples)}")
    per_class = Counter([c for _, c in all_samples])
    print("Per-class counts:", dict(per_class))

In [None]:
from torch.utils.data import DataLoader
from sklearn.model_selection import StratifiedKFold
import numpy as np

try:
    ACTIVE_NOISE_PATH
except NameError:
    ACTIVE_NOISE_PATH = WHITE_PATH
    ACTIVE_NOISE_NAME = "white"


if not COMMIT_MODE:
    print("[MFCC] Will compute MFCC params per SR using rules function during loader build")

def build_kfold_loaders_generic(
    samples, class_to_idx,
    sr=None, n_splits=5, seed=42,
    batch_size=64, num_workers=2, pin_memory=True,
    noise_path=None,
    max_length=None,
    noise_prob=0.0, snr_range=(5,20),
    augment_mode="file_noise",       
    norm_mode="none",                
    crop_mode="left"
):

    sr = int(sr if sr is not None else (SR_LIST[-1] if 'SR_LIST' in globals() else 16000))
    max_length = int(max_length if max_length is not None else sr)

    _mfcc_cfg = mfcc_params_for_sr(sr)
    WIN_LENGTH = _mfcc_cfg["win_length"]
    HOP_LENGTH = _mfcc_cfg["hop_length"]
    N_FFT      = _mfcc_cfg["n_fft"]
    if not COMMIT_MODE:
        print(f"[MFCC] SR={sr} -> n_fft={N_FFT} win_length={WIN_LENGTH} hop_length={HOP_LENGTH}")

    y_all = np.array([class_to_idx[c] for _, c in samples], dtype=np.int64)
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=int(seed))

    use_cuda = torch.cuda.is_available()
    pm = bool(pin_memory and use_cuda)

    dl_args = dict(
        batch_size=int(batch_size),
        num_workers=int(num_workers),
        pin_memory=pm,
        persistent_workers=True if int(num_workers) > 0 else False,
        drop_last=False,
    )
    
    if augment_mode == "file_noise":
        if noise_path is None:
            raise ValueError("augment_mode='file_noise' membutuhkan noise_path yang valid")
        if not os.path.exists(noise_path):
            raise FileNotFoundError(f"noise_path invalid: {noise_path}")
            
    g_base = int(seed) * 1_000_003
    folds = []
    for fold_id, (tr_idx, va_idx) in enumerate(skf.split(samples, y_all), start=1):
        tr_s = [samples[i] for i in tr_idx]
        va_s = [samples[i] for i in va_idx]

        ds_tr = VoiceCommandDatasetWithNoise(tr_s, class_to_idx,
            white_noise_path=noise_path if augment_mode == "file_noise" else None,
            is_training=True,
            sr=sr, max_length=max_length,
            n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH, n_mels=N_MELS, noise_prob=noise_prob, snr_range=snr_range, 
        )
        ds_va = VoiceCommandDatasetWithNoise(va_s, class_to_idx,
            white_noise_path=None, is_training=False,               
            sr=sr, max_length=max_length,
            n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH, n_mels=N_MELS, noise_prob=0.0, snr_range=snr_range, 
        )

        g = torch.Generator(device="cpu"); g.manual_seed(g_base + fold_id)
        dl_tr = DataLoader(ds_tr, shuffle=True,  generator=g, **dl_args)
        dl_va = DataLoader(ds_va, shuffle=False,                 **dl_args)

        if not COMMIT_MODE:
            print(f"[SR={sr} | seed={seed} | fold={fold_id}] "
                  f"train={len(tr_s)} val={len(va_s)} noise={ACTIVE_NOISE_NAME} (workers={num_workers}, pin_memory={pm})")

        folds.append({"fold": fold_id, "train_loader": dl_tr, "val_loader": dl_va})

    return folds

def build_kfold_loaders_noise(
    samples, class_to_idx, n_splits=5, seed=42,
    batch_size=64, num_workers=2, pin_memory=True,
    noise_path=None, max_length=None,
    noise_prob=NOISE_PROB, snr_range=SNR_RANGE, sr=None
):
    return build_kfold_loaders_generic(
        samples=samples, class_to_idx=class_to_idx,
        sr=sr, n_splits=n_splits, seed=seed,
        batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory,
        noise_path=(noise_path if noise_path is not None else ACTIVE_NOISE_PATH),
        max_length=max_length,
        noise_prob=noise_prob, snr_range=snr_range,
        augment_mode="file_noise",
        norm_mode="none", crop_mode="left"
    )

In [None]:
class MFCC_LSTM(nn.Module):
    def __init__(
        self,
        n_mfcc: int = N_MFCC,
        hidden: int = 128,
        num_layers: int = 2,
        num_classes: int = 4,
        bidirectional: bool = True,
        dropout: float = 0.2,
    ):
        super().__init__()
        lstm_dropout = dropout if num_layers > 1 else 0.0
        self.lstm = nn.LSTM(
            input_size=n_mfcc,
            hidden_size=hidden,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional,
            dropout=lstm_dropout,
        )
        out_dim = hidden * (2 if bidirectional else 1)
        self.head = nn.Sequential(
            nn.LayerNorm(out_dim),
            nn.Dropout(dropout),
            nn.Linear(out_dim, num_classes),
        )
        self._init_weights()

    def _init_weights(self):
        for name, param in self.lstm.named_parameters():
            if "weight_hh" in name:
                nn.init.orthogonal_(param)
            elif "weight_ih" in name:
                nn.init.xavier_uniform_(param)
            elif "bias" in name:
                nn.init.zeros_(param)
        lin = self.head[-1]
        nn.init.xavier_uniform_(lin.weight)
        nn.init.zeros_(lin.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if x.is_cuda:
            self.lstm.flatten_parameters()
        out, _ = self.lstm(x)
        out = out[:, -1, :]              
        logits = self.head(out)          
        return logits

In [None]:
from contextlib import nullcontext
from torch.nn.utils import clip_grad_norm_

def train_one_epoch(model, loader, optimizer, criterion, scheduler=None, grad_clip_norm=None):
    model.train()
    total_loss = 0.0

    use_amp = ("SCALER" in globals()) and (SCALER is not None) and (device.type == "cuda")
    amp_ctx = torch.autocast(device_type="cuda") if use_amp else nullcontext()

    step_per_batch = (
        scheduler is not None
        and not isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau)
    )

    for feats, y in loader:
        feats = feats.to(device, non_blocking=NON_BLOCK).float()
        y     = y.to(device, non_blocking=NON_BLOCK).long()

        optimizer.zero_grad(set_to_none=True)

        with amp_ctx:
            logits = model(feats)
            loss   = criterion(logits, y)

        if use_amp:
            SCALER.scale(loss).backward()
            if grad_clip_norm is not None:
                SCALER.unscale_(optimizer)
                clip_grad_norm_(model.parameters(), max_norm=float(grad_clip_norm))
            SCALER.step(optimizer)
            SCALER.update()
        else:
            loss.backward()
            if grad_clip_norm is not None:
                clip_grad_norm_(model.parameters(), max_norm=float(grad_clip_norm))
            optimizer.step()

        if step_per_batch:
            scheduler.step()

        total_loss += loss.item() * y.size(0)

    return float(total_loss) / float(len(loader.dataset))


@torch.no_grad()
def evaluate(model, loader, criterion=None):
    model.eval()
    all_true, all_pred = [], []
    total_loss = 0.0
    have_loss = criterion is not None

    for feats, y in loader:
        feats = feats.to(device, non_blocking=NON_BLOCK).float()
        y     = y.to(device, non_blocking=NON_BLOCK).long()

        logits = model(feats)
        if have_loss:
            total_loss += criterion(logits, y).item() * y.size(0)

        pred = logits.argmax(dim=1)
        all_true.extend(y.tolist())
        all_pred.extend(pred.tolist())

    acc = accuracy_score(all_true, all_pred)
    f1  = f1_score(all_true, all_pred, average="macro")
    avg_loss = (float(total_loss) / float(len(loader.dataset))) if have_loss else None

    return float(acc), float(f1), avg_loss, np.array(all_true, dtype=np.int64), np.array(all_pred, dtype=np.int64)

In [None]:
COMMIT_MODE = True

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
plt.ioff()

from tqdm import tqdm as _tqdm

def tqdm(*args, **kwargs):

    if COMMIT_MODE:
        kwargs["disable"] = True
    return _tqdm(*args, **kwargs)

import pandas as pd
pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 20)
pd.set_option("display.max_colwidth", 120)

if COMMIT_MODE:
    def _no_show(*args, **kwargs):
        pass
    plt.show = _no_show

In [None]:
import io, sys, contextlib, os, time, numpy as np, pandas as pd, shutil, json
from pathlib import Path
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

SR_LIST = sorted(set(int(s) for s in SR_LIST))


def _outdirs_for(SR: int):
    root = Path(f"/kaggle/working/kfold_outputs_sr{SR}")
    d_fold = root / "per_seed_and_fold"   
    d_sum  = root / "summary"             
    d_fold.mkdir(parents=True, exist_ok=True)
    d_sum.mkdir(parents=True, exist_ok=True)
    return root, d_fold, d_sum

@contextlib.contextmanager
def mute_outputs(active: bool):
    if not active:
        yield
    else:
        buf = io.StringIO()
        with contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
            yield

set_seed(0)

USE_CUDA   = torch.cuda.is_available()
device     = torch.device("cuda" if USE_CUDA else "cpu")
PIN_MEMORY = bool(USE_CUDA)
NON_BLOCK  = bool(USE_CUDA)
if not COMMIT_MODE:
    print("Device:", device)

AMP_ENABLE = USE_CUDA
if AMP_ENABLE:
    major_cc = torch.cuda.get_device_capability()[0]
    AMP_DTYPE = torch.bfloat16 if major_cc >= 8 else torch.float16
    try:
        SCALER = torch.amp.GradScaler(device="cuda") if AMP_DTYPE is torch.float16 else None
    except Exception:
        SCALER = torch.cuda.amp.GradScaler(enabled=(AMP_DTYPE is torch.float16))
else:
    AMP_DTYPE = None
    SCALER = None

results = []
summary_rows = []
efficiency_rows_global = []

with mute_outputs(COMMIT_MODE):
    for seed in SEEDS:
        if not COMMIT_MODE:
            print("\n" + "="*70)
            print(f"Running SEED = {seed}")
            print("="*70)
        set_seed(seed)

        for sr in SR_LIST:
            SR = int(sr)
            ROOT_OUT, OUT_FOLD, OUT_SUMM = _outdirs_for(SR)
            MAX_LENGTH = SR

            folds = build_kfold_loaders_noise(
                all_samples, CLASS_TO_IDX,
                n_splits=N_SPLITS, seed=seed,
                batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY,
                noise_path=ACTIVE_NOISE_PATH,
                max_length=MAX_LENGTH, noise_prob=NOISE_PROB, snr_range=SNR_RANGE,
                sr=SR
            )

            for fd in folds:
                fold_id     = fd["fold"]
                train_loader= fd["train_loader"]
                val_loader  = fd["val_loader"]

                RUN_NAME = f"lstm_seed{seed}_fold{fold_id}_sr{SR}"
                OUT_SUB  = str((ROOT_OUT / "per_seed_and_fold" / RUN_NAME).resolve())
                os.makedirs(OUT_SUB, exist_ok=True)
                BEST_CKPT = os.path.join(OUT_SUB, "best_model.pth")
                BEST_FULL = os.path.join(OUT_SUB, "best_full.pt")
                LAST_CKPT = os.path.join(OUT_SUB, "last_model.pth")
                
                model = MFCC_LSTM(n_mfcc=N_MFCC, num_classes=len(SELECTED_CLASSES)).to(device)
                assert next(model.parameters()).is_cuda == USE_CUDA, "Model belum di CUDA!"
                n_params = int(sum(p.numel() for p in model.parameters()))
                
                optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
                total_steps = max(1, EPOCHS * len(train_loader))
                scheduler = torch.optim.lr_scheduler.OneCycleLR(
                    optimizer, max_lr=1e-3, total_steps=total_steps
                )
                criterion = nn.CrossEntropyLoss().to(device)

                best_val_acc = -1.0
                best_true = best_pred = None
                current_best_ckpt_path = BEST_CKPT
                history = []  # {epoch, train_loss, val_loss, val_acc, val_f1, epoch_time_sec, throughput_samples_per_sec}

                for ep in range(1, EPOCHS+1):
                    model.train()
                    tr_loss_sum = 0.0
                    n_train = 0
                    ep_t0 = time.time()

                    for feats, y in train_loader:
                        feats = feats.to(device, non_blocking=NON_BLOCK).float()
                        y     = y.to(device, non_blocking=NON_BLOCK).long()

                        optimizer.zero_grad(set_to_none=True)

                        if AMP_ENABLE:
                            with torch.autocast(device_type='cuda', dtype=AMP_DTYPE):
                                logits = model(feats)
                                loss = criterion(logits, y)
                            if SCALER is not None:
                                SCALER.scale(loss).backward()
                                SCALER.step(optimizer)
                                SCALER.update()
                            else:
                                loss.backward()
                                optimizer.step()
                        else:
                            logits = model(feats)
                            loss = criterion(logits, y)
                            loss.backward()
                            optimizer.step()

                        if scheduler is not None:
                            scheduler.step()

                        bs = y.size(0)
                        tr_loss_sum += loss.item() * bs
                        n_train += bs

                    tr_loss = tr_loss_sum / max(1, n_train)

                    val_acc, val_f1, val_loss, y_true, y_pred = evaluate(model, val_loader, criterion)

                    if val_acc > best_val_acc:
                        best_val_acc = float(val_acc)
                        best_true = y_true.copy()
                        best_pred = y_pred.copy()

                        torch.save(model.state_dict(), BEST_CKPT)
                        torch.save({
                            "epoch": ep,
                            "model_state": model.state_dict(),
                            "optimizer_state": optimizer.state_dict(),
                            "scheduler_state": scheduler.state_dict() if scheduler is not None else None,
                            "val_acc": best_val_acc,
                            "sr": SR,
                            "seed": seed,
                            "fold": fold_id,
                            "classes": list(SELECTED_CLASSES),
                            "class_to_idx": CLASS_TO_IDX,
                            "n_mfcc": N_MFCC,
                        }, BEST_FULL)
                        current_best_ckpt_path = BEST_CKPT

                    ep_time = time.time() - ep_t0
                    thr = float(n_train) / ep_time if ep_time > 0 else 0.0
                    history.append({
                        "epoch": ep,
                        "train_loss": float(tr_loss),
                        "val_loss": float(val_loss),
                        "val_acc": float(val_acc),
                        "val_f1": float(val_f1),
                        "epoch_time_sec": float(ep_time),
                        "throughput_samples_per_sec": float(thr),
                    })

                    if (ep % 5 == 0 or ep == 1 or ep == EPOCHS) and not COMMIT_MODE:
                        if USE_CUDA:
                            gpu_mb = torch.cuda.memory_allocated() / 1e6
                            print(f"[SR={SR} | GPU {gpu_mb:.1f} MB]", end=" ")
                        print(f"Seed {seed} | Fold {fold_id} | Epoch {ep:02d} "
                              f"| tr_loss={tr_loss:.4f} | va_loss={val_loss:.4f} "
                              f"| va_acc={val_acc:.4f} | va_f1={val_f1:.4f} | ep_time={ep_time:.2f}s | thr={thr:.1f}/s")

                torch.save(model.state_dict(), LAST_CKPT)

                # Confusion matrix
                cm = confusion_matrix(best_true, best_pred, labels=list(range(len(SELECTED_CLASSES))))
                cm_df = pd.DataFrame(cm, index=SELECTED_CLASSES, columns=SELECTED_CLASSES)
                cm_path = os.path.join(OUT_FOLD, f"cm_lstm_seed{seed}_fold{fold_id}_sr{SR}.csv")
                cm_df.to_csv(cm_path)

                # Save history CSV
                hist_df = pd.DataFrame(history)
                hist_csv = os.path.join(OUT_SUB, "history.csv")
                hist_df.to_csv(hist_csv, index=False)

                # Plots: loss and metrics
                try:
                    # Loss curves
                    plt.figure(figsize=(8,5))
                    plt.plot(hist_df["epoch"], hist_df["train_loss"], label="train_loss")
                    plt.plot(hist_df["epoch"], hist_df["val_loss"], label="val_loss")
                    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Loss Curve"); plt.legend()
                    plt.tight_layout(); plt.savefig(os.path.join(OUT_SUB, "loss_curve.png")); plt.close()

                    # Metric curves
                    plt.figure(figsize=(8,5))
                    plt.plot(hist_df["epoch"], hist_df["val_acc"], label="val_acc")
                    plt.plot(hist_df["epoch"], hist_df["val_f1"], label="val_f1")
                    plt.xlabel("Epoch"); plt.ylabel("Score"); plt.title("Validation Metrics"); plt.legend()
                    plt.tight_layout(); plt.savefig(os.path.join(OUT_SUB, "metrics_curve.png")); plt.close()

                    # Error per class bar (1 - recall)
                    per_class_counts = cm_df.sum(axis=1).replace(0, np.nan)
                    correct = np.diag(cm)
                    recall = correct / per_class_counts.values
                    err_rate = 1.0 - recall
                    plt.figure(figsize=(10,5))
                    plt.bar(cm_df.index, err_rate)
                    plt.ylabel("Error rate (1 - recall)"); plt.title("Error per Class")
                    plt.xticks(rotation=45, ha='right')
                    plt.tight_layout(); plt.savefig(os.path.join(OUT_SUB, "error_per_class.png")); plt.close()

                    # Confusion matrix heatmap
                    plt.figure(figsize=(6,5))
                    plt.imshow(cm, interpolation='nearest', cmap='Blues')
                    plt.title('Confusion Matrix'); plt.colorbar()
                    tick_marks = np.arange(len(SELECTED_CLASSES))
                    plt.xticks(tick_marks, SELECTED_CLASSES, rotation=45, ha='right')
                    plt.yticks(tick_marks, SELECTED_CLASSES)
                    plt.tight_layout(); plt.ylabel('True label'); plt.xlabel('Predicted label')
                    plt.savefig(os.path.join(OUT_SUB, "confusion_matrix.png")); plt.close()
                except Exception as e:
                    print(f"[WARN] Failed to plot curves/CM for seed={seed} fold={fold_id} SR={SR}: {e}")

                # ROC & PR curves using best checkpoint
                try:
                    from sklearn.preprocessing import label_binarize
                    from sklearn.metrics import roc_curve, auc, roc_auc_score, precision_recall_curve, average_precision_score

                    # Load best weights 
                    try:
                        state = torch.load(current_best_ckpt_path, map_location=device)
                        if isinstance(state, dict) and not any(k.startswith('layer') for k in state.keys()) and 'model_state' in state:
                            model.load_state_dict(state['model_state'])
                        else:
                            model.load_state_dict(state)
                    except Exception:
                        pass
                    model.eval()

                    y_true_list, y_score_chunks = [], []
                    with torch.no_grad():
                        for feats, y in val_loader:
                            feats = feats.to(device, non_blocking=NON_BLOCK).float()
                            logits = model(feats)
                            probs = torch.softmax(logits, dim=1).detach().cpu().numpy()
                            y_score_chunks.append(probs)
                            y_true_list.extend(y.numpy().tolist())

                    y_true_np = np.array(y_true_list, dtype=np.int64)
                    y_score_np = np.vstack(y_score_chunks) if y_score_chunks else np.zeros((0, len(SELECTED_CLASSES)))
                    n_classes = len(SELECTED_CLASSES)
                    if y_score_np.shape[0] > 0:
                        y_bin = label_binarize(y_true_np, classes=list(range(n_classes)))
                        # ROC curves
                        fpr, tpr, roc_auc = {}, {}, {}
                        for i in range(n_classes):
                            fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_score_np[:, i])
                            roc_auc[i] = auc(fpr[i], tpr[i])
                        fpr['micro'], tpr['micro'], _ = roc_curve(y_bin.ravel(), y_score_np.ravel())
                        roc_auc['micro'] = auc(fpr['micro'], tpr['micro'])
                        roc_auc_macro = roc_auc_score(y_bin, y_score_np, average='macro', multi_class='ovr')

                        plt.figure(figsize=(8,6))
                        for i, name in enumerate(SELECTED_CLASSES):
                            plt.plot(fpr[i], tpr[i], label=f"{name} (AUC={roc_auc[i]:.3f})")
                        plt.plot([0,1],[0,1], 'k--', alpha=0.4)
                        plt.plot(fpr['micro'], tpr['micro'], linestyle='--', label=f"micro (AUC={roc_auc['micro']:.3f})")
                        plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate"); plt.title("ROC Curves (OvR)")
                        plt.legend()
                        plt.tight_layout(); plt.savefig(os.path.join(OUT_SUB, "roc_curve.png")); plt.close()

                        auc_rows = ([{'class': SELECTED_CLASSES[i], 'roc_auc': float(roc_auc[i])} for i in range(n_classes)] +
                                    [{'class': 'micro', 'roc_auc': float(roc_auc['micro'])},
                                     {'class': 'macro', 'roc_auc': float(roc_auc_macro)}])
                        pd.DataFrame(auc_rows).to_csv(os.path.join(OUT_SUB, "roc_auc_summary.csv"), index=False)

                        # PR curves
                        precision, recall, ap = {}, {}, {}
                        for i in range(n_classes):
                            precision[i], recall[i], _ = precision_recall_curve(y_bin[:, i], y_score_np[:, i])
                            ap[i] = average_precision_score(y_bin[:, i], y_score_np[:, i])
                        precision['micro'], recall['micro'], _ = precision_recall_curve(y_bin.ravel(), y_score_np.ravel())
                        ap_micro = average_precision_score(y_bin, y_score_np, average='micro')
                        ap_macro = average_precision_score(y_bin, y_score_np, average='macro')

                        plt.figure(figsize=(8,6))
                        for i, name in enumerate(SELECTED_CLASSES):
                            plt.plot(recall[i], precision[i], label=f"{name} (AP={ap[i]:.3f})")
                        plt.plot(recall['micro'], precision['micro'], linestyle='--', label=f"micro (AP={ap_micro:.3f})")
                        plt.xlabel("Recall"); plt.ylabel("Precision"); plt.title("Precision-Recall Curves (OvR)")
                        plt.legend()
                        plt.tight_layout(); plt.savefig(os.path.join(OUT_SUB, "pr_curve.png")); plt.close()

                        ap_rows = ([{'class': SELECTED_CLASSES[i], 'average_precision': float(ap[i])} for i in range(n_classes)] +
                                   [{'class': 'micro', 'average_precision': float(ap_micro)},
                                    {'class': 'macro', 'average_precision': float(ap_macro)}])
                        pd.DataFrame(ap_rows).to_csv(os.path.join(OUT_SUB, "pr_ap_summary.csv"), index=False)
                except Exception as e:
                    print(f"[WARN] Failed to compute PR/ROC curves for seed={seed} fold={fold_id} SR={SR}: {e}")

                # Efficiency rows per-fold
                total_time = float(hist_df["epoch_time_sec"].sum()) if not hist_df.empty else 0.0
                avg_ep_time = float(hist_df["epoch_time_sec"].mean()) if not hist_df.empty else 0.0
                mean_thr = float(hist_df["throughput_samples_per_sec"].mean()) if not hist_df.empty else 0.0
                efficiency_rows_global.append({
                    "sr": SR,
                    "seed": seed,
                    "fold": fold_id,
                    "arch": "LSTM",
                    "n_params": n_params,
                    "epochs": EPOCHS,
                    "total_time_sec": total_time,
                    "avg_epoch_time_sec": avg_ep_time,
                    "mean_throughput_samples_per_sec": mean_thr,
                })

                results.append({
                    "seed": seed,
                    "fold": fold_id,
                    "sr": int(SR),
                    "val_acc": float(best_val_acc),
                    "cm_path": cm_path,
                    "ckpt_path": current_best_ckpt_path,
                })

        for sr0 in SR_LIST:
            accs = [r["val_acc"] for r in results if r["seed"] == seed and r["sr"] == int(sr0)]
            if accs:
                summary_rows.append({
                    "seed": seed,
                    "sr": int(sr0),
                    "acc_mean_over_folds": float(np.mean(accs)),
                    "acc_std_over_folds":  float(np.std(accs)),
                    "n_folds": N_SPLITS
                })

# Efficiency summaries per SR
if efficiency_rows_global:
    eff_df = pd.DataFrame(efficiency_rows_global)
    for SR in sorted(set(eff_df["sr"].tolist())):
        ROOT_OUT, OUT_FOLD, OUT_SUMM = _outdirs_for(SR)
        eff_df_sr = eff_df[eff_df["sr"] == int(SR)].copy()
        if not eff_df_sr.empty:
            eff_df_sr.to_csv(OUT_SUMM / f"lstm_speed_efficiency_per_fold_sr{SR}.csv", index=False)
            agg = (
                eff_df_sr.groupby(["seed"]).agg({
                    "total_time_sec": "sum",
                    "avg_epoch_time_sec": "mean",
                    "mean_throughput_samples_per_sec": "mean",
                    "fold": "count"
                }).rename(columns={"fold": "n_folds"}).reset_index()
            )
            agg.to_csv(OUT_SUMM / f"lstm_speed_efficiency_per_seed_sr{SR}.csv", index=False)

if not COMMIT_MODE:
    print("\nTraining selesai.")

df_results_all = pd.DataFrame(results)
df_seed_all    = pd.DataFrame(summary_rows)

for SR in sorted(set(SR_LIST)):
    ROOT_OUT, OUT_FOLD, OUT_SUMM = _outdirs_for(SR)

    if not df_results_all.empty:
        df_res_sr = df_results_all[df_results_all["sr"] == int(SR)].copy()
        if len(df_res_sr):
            df_res_sr.to_csv(OUT_FOLD / f"lstm_kfold_results_per_fold_sr{SR}.csv", index=False)

    if not df_seed_all.empty:
        df_seed_sr = df_seed_all[df_seed_all["sr"] == int(SR)].copy()
        if len(df_seed_sr):
            df_seed_sr.to_csv(OUT_SUMM / f"lstm_kfold_summary_per_seed_sr{SR}.csv", index=False)

            mu_acc = float(df_seed_sr["acc_mean_over_folds"].mean())
            sd_acc = float(df_seed_sr["acc_mean_over_folds"].std(ddof=1)) if len(df_seed_sr) > 1 else 0.0

            df_sr_summary = pd.DataFrame([{
                "model": "LSTM",
                "sr": int(SR),
                "acc_mean": mu_acc,
                "acc_sd": sd_acc,
                "n_seeds": int(df_seed_sr["seed"].nunique()),
                "kfold": int(df_seed_sr["n_folds"].max()) if "n_folds" in df_seed_sr else 5
            }])
            df_sr_summary.to_csv(OUT_SUMM / f"lstm_kfold_multi_seed_summary_sr{SR}.csv", index=False)

    df_res_sr = df_results_all[df_results_all["sr"] == int(SR)].copy()
    if not df_res_sr.empty:
        manifest_path = OUT_SUMM / f"lstm_best_checkpoints_per_seed_fold_sr{SR}.csv"
        cols = ["seed", "fold", "sr", "val_acc", "ckpt_path", "cm_path"]
        (df_res_sr[cols].sort_values(["seed", "fold"])
         .to_csv(manifest_path, index=False))

        idx_per_seed = df_res_sr.groupby("seed")["val_acc"].idxmax()
        df_best_per_seed = df_res_sr.loc[idx_per_seed].copy().sort_values(["seed"]).reset_index(drop=True)

        per_seed_csv = OUT_SUMM / f"lstm_best_per_seed_sr{SR}.csv"
        df_best_per_seed[["seed", "fold", "sr", "val_acc", "ckpt_path"]].to_csv(per_seed_csv, index=False)

        for _, row in df_best_per_seed.iterrows():
            src = Path(row["ckpt_path"])
            dst = OUT_SUMM / f"bestmodel_seed{int(row['seed'])}_sr{int(SR)}.pth"
            try:
                dst.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(src, dst)
            except Exception as e:
                print(f"[WARN][SR={SR}] Gagal salin best per seed {int(row['seed'])}: {e}")

        row_sr_best = df_res_sr.loc[df_res_sr["val_acc"].idxmax()]
        src = Path(row_sr_best["ckpt_path"])
        dst = ROOT_OUT / f"bestmodel_sr{int(SR)}.pth"
        try:
            dst.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(src, dst)
        except Exception as e:
            print(f"[WARN][SR={SR}] Gagal salin best per SR: {e}")

if not df_results_all.empty:
    row_global = df_results_all.loc[df_results_all["val_acc"].idxmax()]
    src = Path(row_global["ckpt_path"])
    final_dst = Path("/kaggle/working") / "bestmodel.pth"
    try:
        final_dst.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, final_dst)
    except Exception as e:
        print(f"[WARN] Gagal salin global best: {e}")

legacy_dir = "/kaggle/working/kfold_outputs"
os.makedirs(legacy_dir, exist_ok=True)

all_best_rows = [] 

for SR in SR_LIST:
    ROOT_OUT, OUT_FOLD, OUT_SUMM = _outdirs_for(SR)
    df_res_sr = df_results_all[df_results_all["sr"] == int(SR)].copy()
    if df_res_sr.empty:
        continue

    idx_per_seed = df_res_sr.groupby("seed")["val_acc"].idxmax()
    df_best_per_seed = df_res_sr.loc[idx_per_seed].copy().sort_values(["seed"]).reset_index(drop=True)

    best_rows_sr = []
    for _, rec in df_best_per_seed.iterrows():
        slim = {
            "seed": int(rec["seed"]),
            "fold": int(rec["fold"]),
            "sr":   int(rec["sr"]),
            "val_acc": float(rec["val_acc"]),
            "cm_path": rec.get("cm_path", None),
            "ckpt_path": rec.get("ckpt_path", None),
        }
        best_rows_sr.append(slim)
        with open(OUT_SUMM / f"best_seed{slim['seed']}_fold{slim['fold']}_sr{SR}.json", "w") as f:
            json.dump(slim, f, indent=2)

    pd.DataFrame(best_rows_sr).to_csv(OUT_SUMM / f"best_per_seed_sr{SR}.csv", index=False)
    with open(OUT_SUMM / f"best_per_seed_sr{SR}.json", "w") as f:
        json.dump(best_rows_sr, f, indent=2)

    all_best_rows.extend(best_rows_sr)

if all_best_rows:
    df_best_all = pd.DataFrame(all_best_rows).sort_values(["sr", "seed"]).reset_index(drop=True)
    df_best_path_csv  = os.path.join(legacy_dir, "best_per_seed.csv")
    df_best_path_json = os.path.join(legacy_dir, "best_per_seed.json")
    df_best_all.to_csv(df_best_path_csv, index=False)
    with open(df_best_path_json, "w") as f:
        json.dump(all_best_rows, f, indent=2)

    idx_best_seed_global = df_results_all.groupby("seed")["val_acc"].idxmax()
    for _, rec in df_results_all.loc[idx_best_seed_global].iterrows():
        slim = {
            "seed": int(rec["seed"]),
            "fold": int(rec["fold"]),
            "sr":   int(rec["sr"]),
            "val_acc": float(rec["val_acc"]),
            "cm_path": rec.get("cm_path", None),
            "ckpt_path": rec.get("ckpt_path", None),
        }
        with open(os.path.join(legacy_dir, f"best_seed{slim['seed']}_fold{slim['fold']}.json"), "w") as f:
            json.dump(slim, f, indent=2)

if not COMMIT_MODE:
    for SR in SR_LIST:
        ROOT_OUT, OUT_FOLD, OUT_SUMM = _outdirs_for(SR)
        print(f"\n[SR={SR}] Saved to:")
        print(f" ├─ {OUT_FOLD}/lstm_kfold_results_per_fold_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/lstm_kfold_summary_per_seed_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/lstm_kfold_multi_seed_summary_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/lstm_best_checkpoints_per_seed_fold_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/lstm_best_per_seed_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/lstm_speed_efficiency_per_fold_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/lstm_speed_efficiency_per_seed_sr{SR}.csv")
        print(f" ├─ {OUT_SUMM}/best_per_seed_sr{SR}.json")
        print(f" ├─ {ROOT_OUT}/bestmodel_sr{SR}.pth")
        print(f" ├─ /kaggle/working/bestmodel.pth (global, opsional)")
        print(f" └─ /kaggle/working/kfold_outputs/best_per_seed.csv|json (legacy)")