In [2]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
import sys
sys.path.insert(0,'/content/drive/My Drive/MSc_Project_Colab/BAD_PyTorch/')

In [4]:
!pip install torchaudio

Collecting torchaudio
  Downloading torchaudio-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 429 kB/s 
Installing collected packages: torchaudio
Successfully installed torchaudio-0.9.0


Train the model

In [21]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torchaudio
import numpy as np

from dcasedataset import DCASE_Dataset
from cnnbinary import CNNNetwork

ANNOTATIONS_FILE = '/content/drive/My Drive/DCASE_Datasets/labels/mini_metadata.csv'
AUDIO_DIR = '/content/drive/My Drive/DCASE_Datasets/audio/'
SAMPLE_RATE = 22050
DURATION = 10
NUM_SAMPLES = 22050 * DURATION
VALIDATION_SPLIT = 0.2
SHUFFLE_DATASET = True


BATCH_SIZE = 25
EPOCHS = 20
LEARNING_RATE = 0.001

# def create_train_loader(train_data, batch_size, train_sampler):
#     train_dataloader = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
#     return train_dataloader

# def create_validation_loader(validation_data, batch_size, valid_sampler):
#     validation_dataloader = DataLoader(validation_data, batch_size=batch_size, sampler=valid_sampler)
#     return validation_dataloader

def train_single_epoch(model, train_dataloader, validation_dataloader, loss_fn, optimiser, device):

  for input, target in train_dataloader:
      input, target = input.to(device), target.to(device)

      # calculate loss
      prediction = model(input)
      # print(prediction.shape)
      # prediction.unsqueeze(1)
      target = target.unsqueeze_(1)
      target = target.type(torch.cuda.FloatTensor)
      loss = loss_fn(sigmoid(prediction), target)

      for val_input, val_target in validation_dataloader:
        val_input, val_target = val_input.to(device), val_target.to(device)

        val_prediction = model(val_input)
        val_target = val_target.unsqueeze_(1)
        val_target = val_target.type(torch.cuda.FloatTensor)
        val_loss = loss_fn(sigmoid(val_prediction), val_target)
          

      # backpropagate error and update weights
      optimiser.zero_grad()
      loss.backward()
      optimiser.step()

  print(f"train loss: {loss.item()}")
  print(f"val loss: {val_loss.item()}")


def train(model, train_dataloader, validation_dataloader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_single_epoch(model, train_dataloader, validation_dataloader, loss_fn, optimiser, device)
        print("---------------------------")
    print("Finished training")


if __name__ == "__main__":

    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    print(f"Using {device}")

    # instantiate dataset object and create data loader
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
        sample_rate=SAMPLE_RATE,
        n_fft=1024,
        hop_length=512,
        n_mels=64
    )

    dcase = DCASE_Dataset(ANNOTATIONS_FILE,
                            AUDIO_DIR,
                            mel_spectrogram,
                            SAMPLE_RATE,
                            NUM_SAMPLES,
                            device)

    # creating data indices for training and validataion splits
    dcase_size = len(dcase)
    indices = list(range(dcase_size))
    split = int(np.floor(VALIDATION_SPLIT * dcase_size))
    if SHUFFLE_DATASET:
      np.random.seed(0)
      np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    train_dataloader = DataLoader(dcase, BATCH_SIZE, sampler=train_sampler)
    validation_dataloader = DataLoader(dcase, BATCH_SIZE, sampler=valid_sampler)
    print(len(train_dataloader))
    print(len(validation_dataloader))

    cnn = CNNNetwork().to(device)
    print(cnn)

    # initialise loss funtion + optimiser
    sigmoid = nn.Sigmoid()
    loss_fn = nn.BCELoss()
    optimiser = torch.optim.Adam(cnn.parameters(), 
                                 lr=LEARNING_RATE)

    # train model
    train(cnn, train_dataloader, validation_dataloader, loss_fn, optimiser, device, EPOCHS)

    # save model
    torch.save(cnn.state_dict(), "/content/drive/My Drive/MSc_Project_Colab/BAD_PyTorch/cnn.pth")
    print("Trained cnn saved at cnn_val.pth")

Using cuda
4
1
CNNNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=17920, out_features=1, bias=True)
)
Epoch 1
/content/drive/My Drive/DCASE_Datasets/audio