In [1]:
# ===================== AASIST-Lite (Spectro-Temporal) FULL TRAIN+EVAL CODE =====================
# Works with your folder structure:
# D:\RealVsMonster_Split\train\<class>\audio.*
# D:\RealVsMonster_Split\val\<class>\audio.*
# D:\RealVsMonster_Split\test\<class>\audio.*
#
# Output:
# - best_aasist_lite.pth
# - confusion_matrix.png
# - roc_multiclass.png
# - pr_multiclass.png
# - loss_curve.png
# - acc_curve.png
# - waveform.png / spectrogram.png / chromagram.png
# - pca_embeddings.png / tsne_embeddings.png
# - classification report printed in console

import os, random
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, precision_recall_curve
from sklearn.preprocessing import label_binarize
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from tqdm import tqdm

# ---------------- CONFIG ----------------
DATASET_ROOT = r"D:\RealVsMonster_Split"
SAMPLE_RATE  = 16000

# Feature: 2-channel -> [LogMel, Linear-Fbank(log)]  (spectral)
N_MELS       = 64
N_LINFB      = 64
N_FFT        = 1024
HOP_LENGTH   = 256
MAX_FRAMES   = 256

# Training
BATCH_SIZE   = 16
EPOCHS       = 30
LR           = 2e-4
RANDOM_SEED  = 42

# SpecAugment
USE_SPECAUG      = True
TIME_MASK_PARAM  = 24
FREQ_MASK_PARAM  = 6

# Audio safety
MIN_AUDIO_SAMPLES = 2048
MIN_RMS = 1e-4

torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# ---------------- CLASS NAMES ----------------
train_base = os.path.join(DATASET_ROOT, "train")
CLASS_NAMES = sorted([d for d in os.listdir(train_base) if os.path.isdir(os.path.join(train_base, d))])
NUM_CLASSES = len(CLASS_NAMES)
print("Classes:", NUM_CLASSES)
print(CLASS_NAMES)

# ---------------- HELPERS ----------------
def pad_trunc_2d(X, max_frames):
    # X: (F, T)
    if X.shape[1] < max_frames:
        pad = np.zeros((X.shape[0], max_frames - X.shape[1]), dtype=np.float32)
        X = np.concatenate([X, pad], axis=1)
    else:
        X = X[:, :max_frames]
    return X

def zscore_norm(X, eps=1e-6):
    mu = float(X.mean())
    std = float(X.std())
    return (X - mu) / (std + eps)

def safe_load_and_trim(path):
    y, sr = librosa.load(path, sr=SAMPLE_RATE, mono=True)
    y, _ = librosa.effects.trim(y, top_db=30)

    if y is None or len(y) < MIN_AUDIO_SAMPLES:
        return None, sr

    rms = float(np.sqrt(np.mean(y**2) + 1e-12))
    if rms < MIN_RMS:
        return None, sr

    return y, sr

# ---------------- FEATURE EXTRACTION ----------------
def extract_logmel(y, sr):
    mel = librosa.feature.melspectrogram(
        y=y, sr=sr, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH, power=2.0
    )
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = zscore_norm(mel).astype(np.float32)
    mel = pad_trunc_2d(mel, MAX_FRAMES)
    return mel  # (64, T)

def extract_log_linear_fbank(y, sr):
    """
    Manual linear-frequency filterbank energies (log)  (no librosa.filters.linear dependency)
    """
    S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH))**2  # (F, T)
    F_bins = S.shape[0]

    freqs = np.linspace(0, sr/2, F_bins, dtype=np.float32)
    edges = np.linspace(0, sr/2, N_LINFB + 2, dtype=np.float32)

    fb = np.zeros((N_LINFB, F_bins), dtype=np.float32)
    for m in range(N_LINFB):
        f_left, f_center, f_right = edges[m], edges[m+1], edges[m+2]
        left  = (freqs - f_left) / (f_center - f_left + 1e-9)
        right = (f_right - freqs) / (f_right - f_center + 1e-9)
        fb[m] = np.maximum(0.0, np.minimum(left, right))

    E = np.dot(fb, S) + 1e-8
    E = np.log(E)

    E = zscore_norm(E).astype(np.float32)
    E = pad_trunc_2d(E, MAX_FRAMES)
    return E  # (64, T)

def extract_features(path):
    y, sr = safe_load_and_trim(path)
    if y is None:
        mel = np.zeros((N_MELS, MAX_FRAMES), dtype=np.float32)
        lfb = np.zeros((N_LINFB, MAX_FRAMES), dtype=np.float32)
    else:
        mel = extract_logmel(y, sr)
        lfb = extract_log_linear_fbank(y, sr)
    X = np.stack([mel, lfb], axis=0).astype(np.float32)  # (2, 64, T)
    return X

# ---------------- SPECAUGMENT ----------------
def spec_augment(x, time_mask_param=24, freq_mask_param=6):
    # x: torch tensor (C,F,T) ; masks apply all channels
    if not USE_SPECAUG:
        return x
    C, F, T = x.shape

    f = random.randint(0, min(freq_mask_param, F))
    f0 = random.randint(0, max(0, F - f))
    if f > 0:
        x[:, f0:f0+f, :] = 0

    t = random.randint(0, min(time_mask_param, T))
    t0 = random.randint(0, max(0, T - t))
    if t > 0:
        x[:, :, t0:t0+t] = 0

    return x

# ---------------- DATASET ----------------
class SpecDataset(Dataset):
    def __init__(self, root, split):
        base = os.path.join(root, split)
        self.split = split
        self.paths, self.labels = [], []
        self.cls_to_idx = {c: i for i, c in enumerate(CLASS_NAMES)}

        for cls in CLASS_NAMES:
            folder = os.path.join(base, cls)
            if not os.path.isdir(folder):
                continue
            for f in os.listdir(folder):
                if f.lower().endswith((".mp3", ".wav", ".ogg", ".flac", ".m4a")):
                    self.paths.append(os.path.join(folder, f))
                    self.labels.append(self.cls_to_idx[cls])

        print(f"{split} set: {len(self.paths)} files")
        self.cache = {}  # cache val/test

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        label = self.labels[idx]

        if self.split != "train" and path in self.cache:
            feat = self.cache[path]
        else:
            feat = extract_features(path)
            if self.split != "train":
                self.cache[path] = feat

        x = torch.tensor(feat)  # (2,64,T)
        if self.split == "train":
            x = spec_augment(x, TIME_MASK_PARAM, FREQ_MASK_PARAM)

        return x, torch.tensor(label, dtype=torch.long), path

train_ds = SpecDataset(DATASET_ROOT, "train")
val_ds   = SpecDataset(DATASET_ROOT, "val")
test_ds  = SpecDataset(DATASET_ROOT, "test")

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=1,          shuffle=False, num_workers=0, pin_memory=True)

# ---------------- MODEL (AASIST-Lite): CNN(spectral) + Transformer(temporal) + AttnPool ----------------
class AttentivePool(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.attn = nn.Sequential(
            nn.Linear(d, d//2),
            nn.Tanh(),
            nn.Linear(d//2, 1)
        )
    def forward(self, x):   # (B,T,D)
        w = torch.softmax(self.attn(x), dim=1)  # (B,T,1)
        return (w * x).sum(dim=1)               # (B,D)

class AASISTLite(nn.Module):
    """
    Input: (B, 2, 64, T)
    CNN -> spectral patterns
    Transformer -> temporal patterns
    """
    def __init__(self, num_classes, d_model=256, nhead=4, num_layers=2, dropout=0.2):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(2, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d((2,2)),  # F/2, T/2

            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d((2,2)),  # F/4, T/4

            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d((2,2)),  # F/8, T/8
        )

        f_after = N_MELS // 8  # 64 -> 8
        self.proj = nn.Linear(128 * f_after, d_model)

        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=d_model*4,
            dropout=dropout, batch_first=True, activation="gelu", norm_first=True
        )
        self.temporal = nn.TransformerEncoder(enc_layer, num_layers=num_layers)

        self.pool = AttentivePool(d_model)
        self.head = nn.Sequential(
            nn.Linear(d_model, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        z = self.cnn(x)                      # (B,128,F',T')
        B, C, F, T = z.shape
        z = z.permute(0, 3, 1, 2).contiguous()  # (B,T,C,F)
        z = z.view(B, T, C*F)                   # (B,T,128*F')
        z = self.proj(z)                        # (B,T,d_model)

        z = self.temporal(z)                    # (B,T,d_model)  (temporal features)
        emb = self.pool(z)                      # (B,d_model)
        logits = self.head(emb)                 # (B,num_classes)
        return logits, emb

model = AASISTLite(num_classes=NUM_CLASSES).to(device)
print(model)

# ---------------- LOSS/OPT ----------------
counts = np.bincount(train_ds.labels, minlength=NUM_CLASSES).astype(np.float32)
w = (counts.sum() / (counts + 1e-6))
w = w / w.mean()
class_weights = torch.tensor(w, dtype=torch.float32).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=2)

use_amp = (device.type == "cuda")
scaler = torch.amp.GradScaler('cuda', enabled=use_amp)

def acc_from_logits(logits, y):
    return (torch.argmax(logits, 1) == y).float().mean().item()

# ---------------- TRAIN ----------------
train_losses, val_losses = [], []
train_accs, val_accs = [], []
best_val = -1.0

for epoch in range(EPOCHS):
    model.train()
    tr_loss_sum, tr_acc_sum, tr_n = 0.0, 0.0, 0

    for x, y, _ in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [train]"):
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.amp.autocast(device_type='cuda', enabled=use_amp):
            logits, _ = model(x)
            loss = criterion(logits, y)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        scaler.step(optimizer)
        scaler.update()

        bs = y.size(0)
        tr_loss_sum += loss.item() * bs
        tr_acc_sum  += acc_from_logits(logits.detach(), y) * bs
        tr_n += bs

    train_loss = tr_loss_sum / tr_n
    train_acc  = tr_acc_sum / tr_n
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    model.eval()
    va_loss_sum, va_acc_sum, va_n = 0.0, 0.0, 0
    with torch.no_grad():
        for x, y, _ in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [val]"):
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)
            logits, _ = model(x)
            loss = criterion(logits, y)

            bs = y.size(0)
            va_loss_sum += loss.item() * bs
            va_acc_sum  += acc_from_logits(logits, y) * bs
            va_n += bs

    val_loss = va_loss_sum / va_n
    val_acc  = va_acc_sum / va_n
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    scheduler.step(val_acc)

    print(f"Epoch {epoch+1:02d} | Train Loss {train_loss:.4f} Acc {train_acc:.4f} | Val Loss {val_loss:.4f} Acc {val_acc:.4f}")

    if val_acc > best_val + 1e-6:
        best_val = val_acc
        torch.save(model.state_dict(), "best_aasist_lite.pth")

print("Training done. Best Val Acc:", best_val)
print("Saved best weights to best_aasist_lite.pth")

# ---------------- TEST + REPORTS + CURVES ----------------
model.load_state_dict(torch.load("best_aasist_lite.pth", map_location=device))
model.eval()

y_true, y_pred = [], []
probs_all = []
embs, emb_labels = [], []

with torch.no_grad():
    for x, y, _ in tqdm(test_loader, desc="Testing"):
        x = x.to(device)
        logits, emb = model(x)
        prob = torch.softmax(logits, dim=1).cpu().numpy()[0]
        pred = int(np.argmax(prob))

        y_true.append(int(y.item()))
        y_pred.append(pred)
        probs_all.append(prob)
        embs.append(emb.cpu().numpy()[0])
        emb_labels.append(int(y.item()))

y_true = np.array(y_true)
y_pred = np.array(y_pred)
probs_all = np.array(probs_all)
embs = np.array(embs)
emb_labels = np.array(emb_labels)

print("\n================ CLASSIFICATION REPORT ================\n")
print(classification_report(y_true, y_pred, target_names=CLASS_NAMES, digits=4))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(12, 10))
plt.imshow(cm, interpolation="nearest")
plt.title("Confusion Matrix (AASIST-Lite)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.colorbar()
plt.tight_layout()
plt.savefig("confusion_matrix.png", dpi=200)
plt.close()

# ROC + PR (multi-class OVR)
Y_bin = label_binarize(y_true, classes=list(range(NUM_CLASSES)))

# ROC
plt.figure(figsize=(10, 7))
for i in range(NUM_CLASSES):
    fpr, tpr, _ = roc_curve(Y_bin[:, i], probs_all[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"{CLASS_NAMES[i]} (AUC={roc_auc:.2f})")
plt.plot([0, 1], [0, 1], "k--")
plt.title("Multi-class ROC Curve (AASIST-Lite)")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(fontsize=7, loc="lower right")
plt.tight_layout()
plt.savefig("roc_multiclass.png", dpi=200)
plt.close()

# PR
plt.figure(figsize=(10, 7))
for i in range(NUM_CLASSES):
    prec, rec, _ = precision_recall_curve(Y_bin[:, i], probs_all[:, i])
    pr_auc = auc(rec, prec)
    plt.plot(rec, prec, label=f"{CLASS_NAMES[i]} (AUC={pr_auc:.2f})")
plt.title("Multi-class Precision-Recall Curve (AASIST-Lite)")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.legend(fontsize=7, loc="lower left")
plt.tight_layout()
plt.savefig("pr_multiclass.png", dpi=200)
plt.close()

# Train/Val curves
plt.figure()
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Val Loss")
plt.title("Training vs Validation Loss (AASIST-Lite)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.tight_layout()
plt.savefig("loss_curve.png", dpi=200)
plt.close()

plt.figure()
plt.plot(train_accs, label="Train Acc")
plt.plot(val_accs, label="Val Acc")
plt.title("Training vs Validation Accuracy (AASIST-Lite)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.tight_layout()
plt.savefig("acc_curve.png", dpi=200)
plt.close()

# Waveform + Spectrogram + Chromagram (one test file)
sample_path = test_ds.paths[0] if len(test_ds.paths) else None
if sample_path:
    y, sr = librosa.load(sample_path, sr=SAMPLE_RATE, mono=True)

    plt.figure(figsize=(12, 3))
    plt.plot(np.linspace(0, len(y)/sr, len(y)), y)
    plt.title("Waveform")
    plt.xlabel("Time (s)")
    plt.tight_layout()
    plt.savefig("waveform.png", dpi=200)
    plt.close()

    D = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH)), ref=np.max)
    plt.figure(figsize=(12, 4))
    librosa.display.specshow(D, sr=sr, hop_length=HOP_LENGTH, x_axis="time", y_axis="hz")
    plt.colorbar(format="%+0.0f dB")
    plt.title("Spectrogram (dB)")
    plt.tight_layout()
    plt.savefig("spectrogram.png", dpi=200)
    plt.close()

    chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, tuning=0.0)
    plt.figure(figsize=(12, 3))
    librosa.display.specshow(chroma, sr=sr, hop_length=HOP_LENGTH, x_axis="time", y_axis="chroma")
    plt.colorbar()
    plt.title("Chromagram")
    plt.tight_layout()
    plt.savefig("chromagram.png", dpi=200)
    plt.close()

# PCA / t-SNE embeddings
pca = PCA(n_components=2, random_state=RANDOM_SEED)
Zp = pca.fit_transform(embs)
plt.figure(figsize=(8, 6))
sc = plt.scatter(Zp[:, 0], Zp[:, 1], c=emb_labels, s=10)
plt.title("PCA of AASIST-Lite Embeddings")
plt.colorbar(sc)
plt.tight_layout()
plt.savefig("pca_embeddings.png", dpi=200)
plt.close()

tsne = TSNE(n_components=2, random_state=RANDOM_SEED, init="pca", learning_rate="auto")
Zt = tsne.fit_transform(embs)
plt.figure(figsize=(8, 6))
sc = plt.scatter(Zt[:, 0], Zt[:, 1], c=emb_labels, s=10)
plt.title("t-SNE of AASIST-Lite Embeddings")
plt.colorbar(sc)
plt.tight_layout()
plt.savefig("tsne_embeddings.png", dpi=200)
plt.close()

print("\nSaved figures:")
print("confusion_matrix.png, roc_multiclass.png, pr_multiclass.png, loss_curve.png, acc_curve.png")
print("waveform.png, spectrogram.png, chromagram.png, pca_embeddings.png, tsne_embeddings.png")
print("Best model: best_aasist_lite.pth")
# ==============================================================================================


Device: cuda
Classes: 30
['Barishal', 'Barishal_monster', 'Chapai', 'Chapai_monster', 'Chittagong', 'Chittagong_monster', 'Habiganj', 'Habiganj_monster', 'Kustia', 'Kustia_monster', 'Naoga', 'Naoga_monster', 'Narail', 'Narail_monster', 'Narsingdi', 'Narsingdi_monster', 'Rajshahi', 'Rajshahi_monster', 'Rangpur', 'Rangpur_monster', 'Sandwip', 'Sandwip_monster', 'Sylhet', 'Sylhet_monster', 'Tangail', 'Tangail_monster', 'kishoreganj', 'kishoreganj_monster', 'pabna', 'pabna_monster']
train set: 27944 files
val set: 6016 files
test set: 5980 files




AASISTLite(
  (cnn): Sequential(
    (0): Conv2d(2, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (proj): Linear(in_features=1024, out_features=256, bias=True)
  (temporal): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2

Epoch 1/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [28:28<00:00,  1.02it/s]
Epoch 1/30 [val]: 100%|██████████████████████████████████████████████████████████████| 376/376 [04:45<00:00,  1.32it/s]


Epoch 01 | Train Loss 1.3559 Acc 0.5096 | Val Loss 0.8586 Acc 0.7083


Epoch 2/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [26:50<00:00,  1.08it/s]
Epoch 2/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 150.01it/s]


Epoch 02 | Train Loss 0.7983 Acc 0.6857 | Val Loss 0.6740 Acc 0.7591


Epoch 3/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [21:49<00:00,  1.33it/s]
Epoch 3/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 152.11it/s]


Epoch 03 | Train Loss 0.6130 Acc 0.7530 | Val Loss 0.5375 Acc 0.8085


Epoch 4/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [20:35<00:00,  1.41it/s]
Epoch 4/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 155.31it/s]


Epoch 04 | Train Loss 0.4996 Acc 0.7920 | Val Loss 0.4766 Acc 0.8339


Epoch 5/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [20:00<00:00,  1.46it/s]
Epoch 5/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 154.44it/s]


Epoch 05 | Train Loss 0.4234 Acc 0.8228 | Val Loss 0.4186 Acc 0.8441


Epoch 6/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [20:00<00:00,  1.46it/s]
Epoch 6/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 152.16it/s]


Epoch 06 | Train Loss 0.3686 Acc 0.8439 | Val Loss 0.3630 Acc 0.8738


Epoch 7/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [23:30<00:00,  1.24it/s]
Epoch 7/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 113.57it/s]


Epoch 07 | Train Loss 0.3249 Acc 0.8615 | Val Loss 0.3509 Acc 0.8777


Epoch 8/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [49:20<00:00,  1.69s/it]
Epoch 8/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 124.48it/s]


Epoch 08 | Train Loss 0.2896 Acc 0.8771 | Val Loss 0.3005 Acc 0.8954


Epoch 9/30 [train]: 100%|██████████████████████████████████████████████████████████| 1747/1747 [30:06<00:00,  1.03s/it]
Epoch 9/30 [val]: 100%|█████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 153.14it/s]


Epoch 09 | Train Loss 0.2607 Acc 0.8891 | Val Loss 0.3617 Acc 0.8778


Epoch 10/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [26:04<00:00,  1.12it/s]
Epoch 10/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 149.80it/s]


Epoch 10 | Train Loss 0.2364 Acc 0.9002 | Val Loss 0.2679 Acc 0.9119


Epoch 11/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [22:52<00:00,  1.27it/s]
Epoch 11/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 150.82it/s]


Epoch 11 | Train Loss 0.2147 Acc 0.9082 | Val Loss 0.2725 Acc 0.9132


Epoch 12/30 [train]:  62%|███████████████████████████████████▎                     | 1084/1747 [13:37<09:00,  1.23it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Epoch 13/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [23:53<00:00,  1.22it/s]
Epoch 13/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 147.81it/s]


Epoch 13 | Train Loss 0.1825 Acc 0.9237 | Val Loss 0.2559 Acc 0.9182


Epoch 14/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [28:17<00:00,  1.03it/s]
Epoch 14/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 154.46it/s]


Epoch 14 | Train Loss 0.1695 Acc 0.9291 | Val Loss 0.2299 Acc 0.9297


Epoch 15/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [21:39<00:00,  1.34it/s]
Epoch 15/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 156.42it/s]


Epoch 15 | Train Loss 0.1596 Acc 0.9350 | Val Loss 0.2604 Acc 0.9232


Epoch 16/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [23:07<00:00,  1.26it/s]
Epoch 16/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 142.75it/s]


Epoch 16 | Train Loss 0.1480 Acc 0.9389 | Val Loss 0.2754 Acc 0.9254


Epoch 17/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [32:47<00:00,  1.13s/it]
Epoch 17/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 137.28it/s]


Epoch 17 | Train Loss 0.1438 Acc 0.9418 | Val Loss 0.2290 Acc 0.9343


Epoch 18/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [29:29<00:00,  1.01s/it]
Epoch 18/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 147.07it/s]


Epoch 18 | Train Loss 0.1337 Acc 0.9457 | Val Loss 0.2259 Acc 0.9382


Epoch 19/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [57:32<00:00,  1.98s/it]
Epoch 19/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 107.86it/s]


Epoch 19 | Train Loss 0.1208 Acc 0.9517 | Val Loss 0.2183 Acc 0.9437


Epoch 20/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [41:22<00:00,  1.42s/it]
Epoch 20/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 109.29it/s]


Epoch 20 | Train Loss 0.1166 Acc 0.9528 | Val Loss 0.2263 Acc 0.9378


Epoch 21/30 [train]:  53%|██████████████████████████████▋                           | 924/1747 [20:57<19:05,  1.39s/it]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Epoch 21/30 [train]: 100%|██████████████████████████████████████████████████████| 1747/1747 [13:03:21<00:00, 26.90s/it]
Epoch 21/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 154.93it/s]


Epoch 21 | Train Loss 0.1108 Acc 0.9558 | Val Loss 0.2535 Acc 0.9383


Epoch 22/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [31:36<00:00,  1.09s/it]
Epoch 22/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 113.42it/s]


Epoch 22 | Train Loss 0.1105 Acc 0.9568 | Val Loss 0.2106 Acc 0.9451


Epoch 23/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [28:19<00:00,  1.03it/s]
Epoch 23/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 151.96it/s]


Epoch 23 | Train Loss 0.1010 Acc 0.9607 | Val Loss 0.2535 Acc 0.9377


Epoch 24/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [30:49<00:00,  1.06s/it]
Epoch 24/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 143.05it/s]


Epoch 24 | Train Loss 0.1021 Acc 0.9620 | Val Loss 0.2139 Acc 0.9481


Epoch 25/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [25:10<00:00,  1.16it/s]
Epoch 25/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 150.46it/s]


Epoch 25 | Train Loss 0.0888 Acc 0.9655 | Val Loss 0.2130 Acc 0.9456


Epoch 26/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [30:37<00:00,  1.05s/it]
Epoch 26/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 149.56it/s]


Epoch 26 | Train Loss 0.0925 Acc 0.9645 | Val Loss 0.2237 Acc 0.9461


Epoch 27/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [31:53<00:00,  1.10s/it]
Epoch 27/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 142.53it/s]


Epoch 27 | Train Loss 0.0859 Acc 0.9671 | Val Loss 0.2372 Acc 0.9461


Epoch 28/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [38:16<00:00,  1.31s/it]
Epoch 28/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 107.86it/s]


Epoch 28 | Train Loss 0.0429 Acc 0.9828 | Val Loss 0.1851 Acc 0.9588


Epoch 29/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [47:03<00:00,  1.62s/it]
Epoch 29/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:03<00:00, 106.56it/s]


Epoch 29 | Train Loss 0.0343 Acc 0.9862 | Val Loss 0.1838 Acc 0.9573


Epoch 30/30 [train]:  23%|█████████████▍                                            | 406/1747 [10:34<22:02,  1.01it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Epoch 30/30 [train]: 100%|█████████████████████████████████████████████████████████| 1747/1747 [34:59<00:00,  1.20s/it]
Epoch 30/30 [val]: 100%|████████████████████████████████████████████████████████████| 376/376 [00:02<00:00, 144.01it/s]


Epoch 30 | Train Loss 0.0357 Acc 0.9863 | Val Loss 0.1873 Acc 0.9598
Training done. Best Val Acc: 0.9597739361702128
Saved best weights to best_aasist_lite.pth


  model.load_state_dict(torch.load("best_aasist_lite.pth", map_location=device))
Testing: 100%|█████████████████████████████████████████████████████████████████████| 5980/5980 [06:10<00:00, 16.12it/s]




                     precision    recall  f1-score   support

           Barishal     0.9362    0.9778    0.9565       135
   Barishal_monster     0.8841    0.9037    0.8938       135
             Chapai     0.9804    1.0000    0.9901       150
     Chapai_monster     1.0000    0.9867    0.9933       150
         Chittagong     0.9750    0.9873    0.9811       237
 Chittagong_monster     0.9118    0.9156    0.9137       237
           Habiganj     0.8935    0.9557    0.9235       158
   Habiganj_monster     0.9045    0.8987    0.9016       158
             Kustia     1.0000    0.9933    0.9967       150
     Kustia_monster     0.9868    1.0000    0.9934       150
              Naoga     1.0000    1.0000    1.0000       150
      Naoga_monster     1.0000    0.9933    0.9967       150
             Narail     0.9879    0.9800    0.9839       250
     Narail_monster     0.9869    0.9040    0.9436       250
          Narsingdi     0.9725    0.9568    0.9646       185
  Narsingdi_monster  