In [None]:
import random

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms

from topography.models.audio import speech_vgg
from topography.training import Writer, evaluate, train
from topography.utils import LinearWarmupCosineAnnealingLR
from topography.utils.data.speechcommands import SpeechCommands

In [None]:
seed = 0 # Random seed
root = './speech_commands' # Output directory

epochs = 12 # Number of training epochs
batch_size = 256 # Batch size
lr = 0.01 # Base learning rate
weight_decay = 0.01 # Weight decay
momentum = 0.9 # SGD momentum

In [None]:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
train_set = SpeechCommands("../../data", subset="training", build=False, n_fft=256, hop_length=128)
train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True,
    num_workers=2, pin_memory=True)
val_set = SpeechCommands("../../data", subset="validation", build=False, n_fft=256, hop_length=128)
val_loader = torch.utils.data.DataLoader(
    val_set, batch_size=batch_size, shuffle=False,
    num_workers=2, pin_memory=True)
test_set = SpeechCommands("../../data", subset="testing", build=False, n_fft=256, hop_length=128)
test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=batch_size, shuffle=False,
    num_workers=2, pin_memory=True)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
criterion = nn.CrossEntropyLoss()
model = speech_vgg().to(device)
optimizer = optim.SGD(model.parameters(),
                      lr=lr, momentum=momentum, weight_decay=weight_decay)

scheduler = LinearWarmupCosineAnnealingLR(
    optimizer, warmup_epochs=epochs*0.3, max_epochs=epochs)
writer = Writer(root)

writer.log_hparams(epochs=epochs, batch_size=batch_size, lr=lr,
                   weight_decay=weight_decay, momentum=momentum,
                   model="speech_vgg",
                   optimizer='sgd', scheduler='LinearWarmupCosineAnnealingLR')

In [None]:
for _ in range(epochs):
    train(model, train_loader, optimizer, criterion, device, writer, is_pytorch_loss=True)
    evaluate(model, val_loader, criterion, device, writer, mode='val', is_pytorch_loss=True)
    scheduler.step()
    writer.save('val', 'acc', model=model, optimizer=optimizer,
                scheduler=scheduler)

In [None]:
evaluate(model, test_loader, criterion, device, writer, mode='test', is_pytorch_loss=True)
writer.close()