# Model Training

## Imports

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from src.__00__paths import spectrogram_train_dir, spectrogram_validation_dir, model_dir

from PIL import Image
from pathlib import Path

## Device Setup

In [2]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

Using device: mps


## Dataset Definition

In [3]:
class GenreSpectrogramDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        self.samples = []
        self.class_to_idx = {genre.name: idx for idx, genre in enumerate(sorted(self.root_dir.iterdir()))}

        for genre in self.class_to_idx:
            for file in (self.root_dir / genre).glob("*.png"):
                self.samples.append((file, self.class_to_idx[genre]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image_path, label = self.samples[idx]
        image = Image.open(image_path).convert('L')

        if self.transform:
            image = self.transform(image)

        return image, label

## Transfroms & DataLoaders

In [4]:
# Declare Transformation to be done in Data setup
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL.Image -> PyTorch tensor
    transforms.Normalize(mean=[0.5], std=[0.5]),  # X_norm = (x - 0.5) / (0.5) = 2x - 1
])

train_data = GenreSpectrogramDataset(spectrogram_train_dir, transform=transform)
validation_data = GenreSpectrogramDataset(spectrogram_validation_dir, transform=transform)

# Load Data
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
validation_loader = DataLoader(validation_data, batch_size=32, shuffle=True)

## Channel Attention

In [5]:
class ChannelAttention(nn.Module):
    def __init__(self, channels, k_size=3):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        y = self.avg_pool(x)
        y = y.squeeze(-1).transpose(-1, -2)
        y = self.conv(y)
        y = self.sigmoid(y).transpose(-1, -2).unsqueeze(-1)
        return x * y.expand_as(x)


In [6]:
class Genre_CNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()

        def block(in_c, out_c, drop):
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, 3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(),
                ChannelAttention(out_c),
                nn.MaxPool2d(2),
                nn.Dropout(drop)
            )

        self.encoder = nn.Sequential(
            block(1, 32, 0.25),
            block(32, 64, 0.25),
            block(64, 128, 0.3),
            block(128, 256, 0.3),
            block(256, 512, 0.4)
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.encoder(x)
        return self.classifier(x)

## Init Model, Optimizer, Loss

In [7]:
model = Genre_CNN(num_classes=10).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

## Training Function

In [8]:
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss, correct = 0, 0

    for X, y in loader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        out = model(X)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * X.size(0)
        correct += (out.argmax(1) == y).sum().item()

    return total_loss / len(loader.dataset), correct / len(loader.dataset)

## Validation Function

In [9]:
@torch.no_grad()
def evaluate(model, loader, criterion):
    model.eval()
    total_loss, correct = 0, 0

    for X, y in loader:
        X, y = X.to(device), y.to(device)
        out = model(X)
        loss = criterion(out, y)

        total_loss += loss.item() * X.size(0)
        correct += (out.argmax(1) == y).sum().item()

    return total_loss / len(loader.dataset), correct / len(loader.dataset)

## Train Loop

In [10]:
epochs = 20

for epoch in range(epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = evaluate(model, validation_loader, criterion)

    print(f"Epoch {epoch + 1:02d}: "
          f"Train Loss={train_loss:.4f} Acc={train_acc:.4f} | "
          f"Val Loss={val_loss:.4f} Acc={val_acc:.4f}")

Epoch 01: Train Loss=2.2436 Acc=0.1845 | Val Loss=2.3047 Acc=0.1000
Epoch 02: Train Loss=2.1019 Acc=0.2604 | Val Loss=2.2946 Acc=0.1000
Epoch 03: Train Loss=1.9945 Acc=0.3004 | Val Loss=2.2769 Acc=0.1267
Epoch 04: Train Loss=1.9481 Acc=0.3047 | Val Loss=2.2912 Acc=0.1733
Epoch 05: Train Loss=1.8544 Acc=0.3534 | Val Loss=2.3370 Acc=0.1867
Epoch 06: Train Loss=1.7931 Acc=0.3820 | Val Loss=2.3758 Acc=0.1867
Epoch 07: Train Loss=1.7302 Acc=0.3734 | Val Loss=2.4437 Acc=0.1867
Epoch 08: Train Loss=1.6914 Acc=0.4120 | Val Loss=2.4550 Acc=0.1933
Epoch 09: Train Loss=1.6581 Acc=0.4034 | Val Loss=2.5672 Acc=0.1933
Epoch 10: Train Loss=1.5922 Acc=0.4263 | Val Loss=2.6645 Acc=0.1867
Epoch 11: Train Loss=1.5662 Acc=0.4363 | Val Loss=2.6469 Acc=0.1933
Epoch 12: Train Loss=1.5342 Acc=0.4478 | Val Loss=2.5860 Acc=0.2000
Epoch 13: Train Loss=1.4797 Acc=0.4692 | Val Loss=2.7262 Acc=0.2000
Epoch 14: Train Loss=1.4394 Acc=0.4850 | Val Loss=2.5356 Acc=0.2333
Epoch 15: Train Loss=1.4600 Acc=0.4721 | Val Los

## Save Model

In [11]:
model_path = model_dir / "genre_cnn_model.pth"
torch.save(model.state_dict(), model_path)
print(f"✔️ Model saved at {'/'.join(model_path.parts[-3:])}.")

✔️ Model saved at outputs/models/genre_cnn_model.pth.
