# Birdsong detection with VGG16

The following notebook trains a VGG16 from scratch on Bird-DCASE
to detect birdsong and achieves (around 1M samples seen)
with the given hyperparameters.

## Imports

In [None]:
import random

import numpy as np
import torch
import torch.optim as optim
from torch import nn

from topography.models import speech_vgg
from topography.training import Writer, evaluate, train
from topography.utils import LinearWarmupCosineAnnealingLR
from topography.utils.data import BirdDCASE, RandomAudioFeaturesCrop

## Hyperparameters and random seed

In [None]:
seed = 0  # Random seed
root = "./bird_dcase"  # Output directory

epochs = 12  # Number of training epochs
batch_size = 256  # Batch size
lr = 0.01  # Base learning rate
weight_decay = 0.01  # Weight decay
momentum = 0.9  # SGD momentum

In [None]:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Data loading

In [None]:
validation_set = "freefield1010"
transform = RandomAudioFeaturesCrop(sample_rate=BirdDCASE.SAMPLE_RATE)

train_set = BirdDCASE(
    "../../data", subset="training", download=True, process=False, crop=True
)
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)
val_set = BirdDCASE("../../data", subset="validation", crop=True)
val_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

## Defining the main componenents

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
criterion = nn.CrossEntropyLoss()
model = speech_vgg(num_classes=2).to(device)
optimizer = optim.SGD(model.parameters(),
                      lr=lr, momentum=momentum, weight_decay=weight_decay)

scheduler = LinearWarmupCosineAnnealingLR(
    optimizer, warmup_epochs=epochs*0.3, max_epochs=epochs)
writer = Writer(root)

writer.log_hparams(epochs=epochs, batch_size=batch_size, lr=lr,
                   weight_decay=weight_decay, momentum=momentum,
                   model="speech_vgg",
                   optimizer='sgd', scheduler='LinearWarmupCosineAnnealingLR')

## Training the model

In [None]:
for _ in range(epochs):
    train(
        model,
        train_loader,
        optimizer,
        criterion,
        device,
        writer,
        is_pytorch_loss=True,
    )
    evaluate(
        model,
        val_loader,
        criterion,
        device,
        writer,
        mode="val",
        is_pytorch_loss=True,
    )
    scheduler.step()
    writer.save(
        "val", "acc", model=model, optimizer=optimizer, scheduler=scheduler
    )

## Final evaluation

In [None]:
evaluate(
    model,
    val_loader,
    criterion,
    device,
    writer,
    mode="test",
    is_pytorch_loss=True,
)
writer.close()