In [1]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
import os
from UrbanSoundDataset import UrbanSoundDataset
from FeedForward import CNNNetwork
import torchaudio
from torch.utils.data import DataLoader
from torch import nn
from datetime import datetime

  torchaudio.set_audio_backend("soundfile")


In [3]:
ANNOTATION_FILE = "./data/UrbanSound8K/metadata/UrbanSound8K.csv"
AUDIO_DIR = os.path.join("data", "UrbanSound8K", "audio")
SAMPLE_RATE = 22050
NUM_SAMPLES = 22050
BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.001

In [4]:
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    hop_length=512,
    n_mels=64
)

In [5]:
usd = UrbanSoundDataset(ANNOTATION_FILE, AUDIO_DIR, mel_spectrogram, SAMPLE_RATE, NUM_SAMPLES, device)
cnn = CNNNetwork().to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RATE)

In [6]:
from torchsummary import summary
summary(cnn, input_size=(1, 64, 44))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 66, 46]             160
              ReLU-2           [-1, 16, 66, 46]               0
         MaxPool2d-3           [-1, 16, 33, 23]               0
            Conv2d-4           [-1, 32, 35, 25]           4,640
              ReLU-5           [-1, 32, 35, 25]               0
         MaxPool2d-6           [-1, 32, 17, 12]               0
            Conv2d-7           [-1, 64, 19, 14]          18,496
              ReLU-8           [-1, 64, 19, 14]               0
         MaxPool2d-9             [-1, 64, 9, 7]               0
           Conv2d-10           [-1, 128, 11, 9]          73,856
             ReLU-11           [-1, 128, 11, 9]               0
        MaxPool2d-12            [-1, 128, 5, 4]               0
          Flatten-13                 [-1, 2560]               0
           Linear-14                   

In [7]:
def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    return train_dataloader

In [8]:
train_dataloader = create_data_loader(usd, BATCH_SIZE)

In [9]:
def train_one_epoch(model, data_loader, loss_function, optimizer, device):
    start_time = datetime.now()
    for inputs, targets in data_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        # calculate loss
        predictions = model(inputs)
        loss = loss_function(predictions, targets)
        # backpropagate error and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Loss: {loss.item()}")
    end_time = datetime.now()
    print(f"Time taken: {end_time - start_time}")
    return loss.item()

In [10]:
def train(model, data_loader, loss_function, optimizer, device, epochs):
    start_time = datetime.now()
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_one_epoch(model, data_loader, loss_function, optimizer, device)
        print("---------------------------")
    end_time = datetime.now()
    print(f"Finished training in {end_time - start_time}")

In [11]:
train(cnn, train_dataloader, loss_function, optimizer, device, EPOCHS)

Epoch 1
Loss: 2.1914429664611816
Time taken: 0:00:52.766144 seconds
---------------------------
Epoch 2
Loss: 2.296118974685669
Time taken: 0:00:42.028385 seconds
---------------------------
Epoch 3
Loss: 2.2893426418304443
Time taken: 0:00:45.687485 seconds
---------------------------
Epoch 4
Loss: 2.199026584625244
Time taken: 0:00:48.257930 seconds
---------------------------
Epoch 5
Loss: 2.032248020172119
Time taken: 0:00:44.052911 seconds
---------------------------
Epoch 6
Loss: 2.0305287837982178
Time taken: 0:00:43.220598 seconds
---------------------------
Epoch 7
Loss: 2.0295135974884033
Time taken: 0:00:42.394624 seconds
---------------------------
Epoch 8
Loss: 2.0317039489746094
Time taken: 0:00:43.056329 seconds
---------------------------
Epoch 9
Loss: 2.0301806926727295
Time taken: 0:00:42.916577 seconds
---------------------------
Epoch 10
Loss: 2.0309083461761475
Time taken: 0:00:43.170528 seconds
---------------------------
Finished training in 0:07:27.561083 second

In [12]:
torch.save(cnn.state_dict(), "models/cnn.pth")