In [18]:
## Imports
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import wavfile as wv
import torch
import torch.nn as nn
from tqdm import tqdm # progress bar
from audioMNIST import AudioMNIST
from DClassifier import CNN2DAudioClassifier
from torch.utils.data import DataLoader, random_split

In [19]:
file_paths = './data/'

In [20]:
if torch.cuda.is_available():
    print("Using CUDA device")
    device = torch.device("cuda:0")
else:
    print("Using CPU")
    device = torch.device('cpu')

Using CPU


In [21]:
# Evaluation
def evaluate(model, val_dl): 
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0
        
    for data in tqdm(val_dl):
        inputs, labels = data[0].to(device), data[1].to(device)

        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s

        optimizer.zero_grad()

        outputs = model(inputs.to(device))
        loss = criterion(outputs, labels.to(device))

        running_loss += loss.item()

        _, prediction = torch.max(outputs, 1)
        correct_prediction += (prediction == labels).sum().item()
        total_prediction += prediction.shape[0]

    num_batches = len(val_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction / total_prediction

    return acc, avg_loss

In [22]:
# Training
def training(model, train_dl, val_dl, num_epochs, 
             criterion, optimizer, scheduler):
    losses = []
    val_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_prediction = 0
        total_prediction = 0
        
        for data in tqdm(train_dl):
            inputs, labels = data[0].to(device), data[1].to(device)
            
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s
            
            optimizer.zero_grad()
            
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
            scheduler.step()
            
            running_loss += loss.item()
            
            _, prediction = torch.max(outputs, 1)
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]
            
        num_batches = len(train_dl)
        avg_loss = running_loss / num_batches
        acc = correct_prediction / total_prediction
        
        v_acc, v_loss = evaluate(model.to(device), val_dl)
        
        print("Epoch: %d, Loss: %.4f, Train Accuracy: %.2f, Val. Loss: %.4f, Val. Accuracy: %.2f" % (
            epoch + 1, avg_loss, acc, v_loss, v_acc
        ))
        
        losses.append(avg_loss)
        val_losses.append(v_loss)
        
    return losses, val_losses

In [23]:
if __name__ == '__main__':
    dataset = AudioMNIST()
    train_dl = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=2)
    model = CNN2DAudioClassifier().to(device)
    from torch.utils.data import random_split

    # Training, Validation, Test Dataset Split
    n_items = len(dataset)
    n_train = round(n_items * 0.8)
    n_val_test = n_items - n_train
    train_ds, val_test_ds = random_split(dataset, [n_train, n_val_test])

    n_val = round(n_val_test * 0.6)
    n_test = n_val_test - n_val
    val_ds, test_ds = random_split(val_test_ds, [n_val, n_test])

    train_dl = torch.utils.data.DataLoader(
        train_ds, batch_size=64, shuffle=True, num_workers=2
    )

    val_dl = torch.utils.data.DataLoader(
        val_ds, batch_size=64, shuffle=False, num_workers=2
    )

    test_dl = torch.utils.data.DataLoader(
        test_ds, batch_size=32, shuffle=False, num_workers=2
    )

In [24]:
N_EPOCHS = 4

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 
                                                max_lr=0.001, 
                                                steps_per_epoch=int(len(train_dl)), 
                                                epochs=N_EPOCHS, 
                                                anneal_strategy='linear')

In [25]:
losses, val_losses = training(model, train_dl, val_dl, N_EPOCHS, criterion, optimizer, scheduler)

100%|██████████| 375/375 [01:26<00:00,  4.32it/s]
100%|██████████| 57/57 [00:25<00:00,  2.27it/s]


Epoch: 1, Loss: 1.2786, Train Accuracy: 0.65, Val. Loss: 0.3706, Val. Accuracy: 0.92


100%|██████████| 375/375 [01:15<00:00,  4.96it/s]
100%|██████████| 57/57 [00:22<00:00,  2.50it/s]


Epoch: 2, Loss: 0.1798, Train Accuracy: 0.96, Val. Loss: 0.1102, Val. Accuracy: 0.97


100%|██████████| 375/375 [01:05<00:00,  5.75it/s]
100%|██████████| 57/57 [00:24<00:00,  2.30it/s]


Epoch: 3, Loss: 0.0822, Train Accuracy: 0.98, Val. Loss: 0.0783, Val. Accuracy: 0.98


100%|██████████| 375/375 [01:14<00:00,  5.02it/s]
100%|██████████| 57/57 [00:24<00:00,  2.35it/s]

Epoch: 4, Loss: 0.0622, Train Accuracy: 0.98, Val. Loss: 0.0649, Val. Accuracy: 0.98





In [26]:
evaluate(model, val_dl)

100%|██████████| 57/57 [00:23<00:00,  2.39it/s]


(0.9830555555555556, 0.06261319274965085)