In [22]:
import os, glob, numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [25]:
EMB_DIR = '/home/jovyan/Features/embeddings'

# 1) Gather embedding files
all_paths  = sorted(glob.glob(os.path.join(EMB_DIR, '**', '*_emb.npz'), recursive=True))
all_labels = [int(np.load(p)['label']) for p in all_paths]

# 2) Split into train/test, try stratify then fallback
try:
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        all_paths, all_labels,
        test_size=0.2,
        stratify=all_labels,
        random_state=42
    )
except ValueError:
    print("Warning: stratify failed (too few samples in some classes), splitting without stratify.")
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        all_paths, all_labels,
        test_size=0.2,
        shuffle=True,
        random_state=42
    )

# 3) Dataset definition
class EmbeddingDataset(Dataset):
    def __init__(self, paths):
        self.paths = paths
    def __len__(self):
        return len(self.paths)
    def __getitem__(self, idx):
        data = np.load(self.paths[idx])
        emb  = data['embedding'].astype(np.float32)
        lbl  = int(data['label'])
        return torch.from_numpy(emb), torch.tensor(lbl)

# 4) Instantiate & wrap in DataLoaders
train_ds = EmbeddingDataset(train_paths)
test_ds  = EmbeddingDataset(test_paths)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=0, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

print(f"Train samples: {len(train_ds)}, Test samples: {len(test_ds)}")

Train samples: 8988, Test samples: 2248


In [27]:
# Cell 3 — Define MLP, criterion, optimizer & count params
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dims, num_classes, dropout=0.5):
        super().__init__()
        layers = []
        dims = [input_dim] + hidden_dims
        for i in range(len(hidden_dims)):
            layers += [
                nn.Linear(dims[i], dims[i+1]),
                nn.ReLU(inplace=True),
                nn.Dropout(dropout)
            ]
        layers.append(nn.Linear(dims[-1], num_classes))
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

sample_emb, sample_lbl = next(iter(train_loader))
input_dim   = sample_emb.shape[1]
num_classes = len(set(all_labels))

model     = MLPClassifier(input_dim, [1024, 512], num_classes, dropout=0.5).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters: {total_params:,}")


Total trainable parameters: 2,714,290


In [31]:
# Cell 4 — Training Loop with Best‐Checkpoint Saving
num_epochs = 20
best_acc   = 0.0
checkpoint_path = 'best_panns_mlp_checkpoint.pt'

for epoch in range(1, num_epochs+1):
    # — Train —
    model.train()
    train_loss = 0.0
    train_correct = 0
    total = 0
    for emb, lbl in tqdm(train_loader, desc=f"Epoch {epoch} ▶ Train"):
        emb, lbl = emb.to(device), lbl.to(device)
        optimizer.zero_grad()
        logits = model(emb)
        loss   = criterion(logits, lbl)
        loss.backward()
        optimizer.step()

        train_loss    += loss.item() * emb.size(0)
        train_correct += (logits.argmax(dim=1) == lbl).sum().item()
        total         += emb.size(0)

    train_loss /= total
    train_acc   = train_correct / total

    # — Validate —
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for emb, lbl in tqdm(test_loader, desc=f"Epoch {epoch} ✅ Val"):
            emb, lbl = emb.to(device), lbl.to(device)
            logits = model(emb)
            loss   = criterion(logits, lbl)

            val_loss    += loss.item() * emb.size(0)
            val_correct += (logits.argmax(dim=1) == lbl).sum().item()
            val_total   += emb.size(0)

    val_loss /= val_total
    val_acc   = val_correct / val_total

    print(f"\nEpoch {epoch:02d} | "
          f"Train: loss={train_loss:.4f}, acc={train_acc:.4f} | "
          f"Val:   loss={val_loss:.4f}, acc={val_acc:.4f}")

    # — Save best checkpoint —
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict':       model.state_dict(),
            'optimizer_state_dict':   optimizer.state_dict(),
            'best_validation_acc':    best_acc,
            'input_dim':              input_dim,
            'hidden_dims':            [1024, 512],
            'num_classes':            num_classes
        }, checkpoint_path)
        print(f"✔️  New best model saved (epoch {epoch}, val_acc={val_acc:.4f})")

print(f"\n🎉 Best validation accuracy: {best_acc:.4f}")


Epoch 1 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 1 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 01 | Train: loss=1.6034, acc=0.5744 | Val:   loss=2.0564, acc=0.5311
✔️  New best model saved (epoch 1, val_acc=0.5311)


Epoch 2 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 2 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 02 | Train: loss=1.5961, acc=0.5702 | Val:   loss=2.0526, acc=0.5418
✔️  New best model saved (epoch 2, val_acc=0.5418)


Epoch 3 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 3 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 03 | Train: loss=1.5669, acc=0.5843 | Val:   loss=2.0463, acc=0.5378


Epoch 4 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 4 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 04 | Train: loss=1.5898, acc=0.5813 | Val:   loss=2.0355, acc=0.5351


Epoch 5 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 5 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 05 | Train: loss=1.5597, acc=0.5837 | Val:   loss=2.0100, acc=0.5440
✔️  New best model saved (epoch 5, val_acc=0.5440)


Epoch 6 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 6 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 06 | Train: loss=1.5218, acc=0.5912 | Val:   loss=2.0014, acc=0.5383


Epoch 7 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 7 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 07 | Train: loss=1.5031, acc=0.5918 | Val:   loss=2.0488, acc=0.5409


Epoch 8 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 8 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 08 | Train: loss=1.5267, acc=0.5860 | Val:   loss=2.0154, acc=0.5467
✔️  New best model saved (epoch 8, val_acc=0.5467)


Epoch 9 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 9 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 09 | Train: loss=1.4792, acc=0.6016 | Val:   loss=2.0503, acc=0.5485
✔️  New best model saved (epoch 9, val_acc=0.5485)


Epoch 10 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 10 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 10 | Train: loss=1.5203, acc=0.5956 | Val:   loss=2.0403, acc=0.5445


Epoch 11 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 11 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 11 | Train: loss=1.5004, acc=0.6010 | Val:   loss=2.0097, acc=0.5472


Epoch 12 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 12 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 12 | Train: loss=1.4853, acc=0.6060 | Val:   loss=2.0389, acc=0.5427


Epoch 13 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 13 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 13 | Train: loss=1.4846, acc=0.6030 | Val:   loss=2.0260, acc=0.5565
✔️  New best model saved (epoch 13, val_acc=0.5565)


Epoch 14 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 14 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 14 | Train: loss=1.4539, acc=0.6100 | Val:   loss=2.0139, acc=0.5498


Epoch 15 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 15 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 15 | Train: loss=1.4668, acc=0.6074 | Val:   loss=2.0021, acc=0.5449


Epoch 16 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 16 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 16 | Train: loss=1.4707, acc=0.6086 | Val:   loss=2.0237, acc=0.5503


Epoch 17 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 17 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 17 | Train: loss=1.4511, acc=0.6086 | Val:   loss=2.0078, acc=0.5525


Epoch 18 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 18 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 18 | Train: loss=1.4193, acc=0.6115 | Val:   loss=2.0626, acc=0.5387


Epoch 19 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 19 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 19 | Train: loss=1.3901, acc=0.6277 | Val:   loss=2.0004, acc=0.5520


Epoch 20 ▶ Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 20 ✅ Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 20 | Train: loss=1.4063, acc=0.6189 | Val:   loss=2.0242, acc=0.5476

🎉 Best validation accuracy: 0.5565


In [32]:
# Cell 5 — (Optional) Load best checkpoint for inference or continued training
ckpt = torch.load('best_panns_mlp_checkpoint.pt', map_location=device)
model.load_state_dict(ckpt['model_state_dict'])
optimizer.load_state_dict(ckpt['optimizer_state_dict'])
print(f"Loaded checkpoint from epoch {ckpt['epoch']} with val_acc={ckpt['best_validation_acc']:.4f}")


Loaded checkpoint from epoch 13 with val_acc=0.5565


  ckpt = torch.load('best_panns_mlp_checkpoint.pt', map_location=device)
