# Deep Learning assignment

## Imports

In [1]:
import os
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim


device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print(f"Using device: {device}")


Using device: mps


----------------------

## Dataset import/definitions

In [2]:
class AccentDataset(Dataset):
    def __init__(self, data_dir, sample_rate=16000, max_length=16000):
        self.sample_rate = sample_rate
        self.max_length  = max_length
        self.files = []
        for root, _, fns in os.walk(data_dir):
            for fn in sorted(fns):
                if fn.lower().endswith('.wav'):
                    self.files.append(os.path.join(root, fn))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        path = self.files[idx]
        waveform, sr = torchaudio.load(path)

        # Resample → mono → pad/truncate → standardize
        if sr != self.sample_rate:
            waveform = torchaudio.transforms.Resample(sr, self.sample_rate)(waveform)
        waveform = waveform.mean(dim=0, keepdim=True)
        if waveform.size(1) < self.max_length:
            pad = self.max_length - waveform.size(1)
            waveform = nn.functional.pad(waveform, (0, pad))
        else:
            waveform = waveform[:, :self.max_length]
        waveform = (waveform - waveform.mean()) / (waveform.std() + 1e-5)

        # Label from filename: '1...' → 0
        label = int(os.path.basename(path)[0]) - 1
        return waveform.to(device), torch.tensor(label, dtype=torch.long, device=device)


------

## Dataloaders

The dataloaders are used for...

In [3]:
train_dir = 'Train'       
val_dir   = 'Test set'    

train_ds = AccentDataset(train_dir)
val_ds   = AccentDataset(val_dir)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=32)


---------

## Model - CNN (task 1.2a)

In [5]:
class RawCNN(nn.Module):
    def __init__(self, num_classes=5, max_length=16000):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(16, 32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2),
        )
        reduced = max_length // 4  # two poolings of 2
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * reduced, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        return self.fc(self.conv(x))

model = RawCNN(num_classes=5, max_length=train_ds.max_length).to(device)

# Model loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)



In [6]:
# Training and validation functions
def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss = correct = total = 0
    with torch.set_grad_enabled(train):
        for X, y in loader:
            logits = model(X)
            loss   = criterion(logits, y)

            if train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            total_loss += loss.item() * X.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total   += X.size(0)

    return total_loss/total, correct/total


### Training loop

In [7]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    tl, ta = run_epoch(train_loader, train=True)
    vl, va = run_epoch(val_loader,   train=False)
    print(f"Epoch {epoch:02d} | "
          f"Train Loss: {tl:.4f}, Acc: {ta:.4f} | "
          f"Val   Loss: {vl:.4f}, Acc: {va:.4f}")


Epoch 01 | Train Loss: 1.8444, Acc: 0.1936 | Val   Loss: 0.8418, Acc: 0.1084
Epoch 02 | Train Loss: 1.5949, Acc: 0.2404 | Val   Loss: 0.8530, Acc: 0.0857
Epoch 03 | Train Loss: 1.5665, Acc: 0.2669 | Val   Loss: 0.8704, Acc: 0.1058
Epoch 04 | Train Loss: 1.5235, Acc: 0.2966 | Val   Loss: 0.8702, Acc: 0.0970
Epoch 05 | Train Loss: 1.4037, Acc: 0.3664 | Val   Loss: 0.8782, Acc: 0.1075
Epoch 06 | Train Loss: 1.2951, Acc: 0.4182 | Val   Loss: 0.8854, Acc: 0.1093
Epoch 07 | Train Loss: 1.1433, Acc: 0.4978 | Val   Loss: 0.9450, Acc: 0.1233
Epoch 08 | Train Loss: 0.9775, Acc: 0.5670 | Val   Loss: 0.9748, Acc: 0.1163
Epoch 09 | Train Loss: 0.8912, Acc: 0.6124 | Val   Loss: 1.0344, Acc: 0.1329
Epoch 10 | Train Loss: 0.7813, Acc: 0.6576 | Val   Loss: 1.0741, Acc: 0.1241
