In [14]:
## Imports
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import wavfile as wv
import torch
import torch.nn as nn
from tqdm import tqdm # progress bar
from audioMNIST import AudioMNIST
from DClassifier import CNN2DAudioClassifier
from torch.utils.data import DataLoader, random_split
from kNN import KNNClassifier

In [None]:
file_paths = './data/'

In [None]:
if torch.cuda.is_available():
    print("Using CUDA device")
    device = torch.device("cuda:0")
else:
    print("Using CPU")
    device = torch.device('cpu')

In [None]:
# Evaluation
def evaluate(model, val_dl): 
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0
        
    for data in tqdm(val_dl):
        inputs, labels = data[0].to(device), data[1].to(device)

        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s

        optimizer.zero_grad()

        outputs = model(inputs.to(device))
        loss = criterion(outputs, labels.to(device))

        running_loss += loss.item()

        _, prediction = torch.max(outputs, 1)
        correct_prediction += (prediction == labels).sum().item()
        total_prediction += prediction.shape[0]

    num_batches = len(val_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction / total_prediction

    return acc, avg_loss

In [None]:
# Training
def training(model, train_dl, val_dl, num_epochs, 
             criterion, optimizer, scheduler):
    losses = []
    val_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_prediction = 0
        total_prediction = 0
        
        for data in tqdm(train_dl):
            inputs, labels = data[0].to(device), data[1].to(device)
            
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s
            
            optimizer.zero_grad()
            
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
            scheduler.step()
            
            running_loss += loss.item()
            
            _, prediction = torch.max(outputs, 1)
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]
            
        num_batches = len(train_dl)
        avg_loss = running_loss / num_batches
        acc = correct_prediction / total_prediction
        
        v_acc, v_loss = evaluate(model.to(device), val_dl)
        
        print("Epoch: %d, Loss: %.4f, Train Accuracy: %.2f, Val. Loss: %.4f, Val. Accuracy: %.2f" % (
            epoch + 1, avg_loss, acc, v_loss, v_acc
        ))
        
        losses.append(avg_loss)
        val_losses.append(v_loss)
        
    return losses, val_losses

In [None]:
if __name__ == '__main__':
    dataset = AudioMNIST()
    train_dl = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=2)
    model = CNN2DAudioClassifier().to(device)
    from torch.utils.data import random_split

    n_items = len(dataset)

    # items for train, validation, and test sets
    n_train = round(n_items * 0.7)
    n_val = round(n_items * 0.15)
     ## Ensures that rounding errors don't leave any data out
    n_test = n_items - n_train - n_val

    # split the dataset into training, validation, and test sets
    train_ds, val_test_ds = random_split(dataset, [n_train, n_items - n_train])
    val_ds, test_ds = random_split(val_test_ds, [n_val, n_test])

    # create DataLoaders
    train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
    val_dl = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)
    test_dl = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=2)

In [16]:
dataset = AudioMNIST()
loader = DataLoader(dataset, batch_size=10, shuffle=True, num_workers=4)

In [19]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Initialize the AudioMNIST dataset and DataLoader
dataset = AudioMNIST()
loader = DataLoader(dataset, batch_size=len(dataset), shuffle=True, num_workers=4)

# Load all data into memory
all_features = []
all_labels = []
for data, target in loader:
    all_features.append(data.numpy())  # Assuming data is already a tensor
    all_labels.append(target.numpy())

# Convert lists to numpy arrays and reshape if necessary
all_features = np.vstack(all_features)
all_labels = np.concatenate(all_labels)

# Flatten features if necessary (depends on mfcc shape)
all_features = all_features.reshape(all_features.shape[0], -1)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.20, random_state=42)

# Initialize and train kNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict and calculate accuracy on the test set
predictions = knn.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Test Accuracy: {accuracy}")


Test Accuracy: 0.8925


In [None]:
EPOCH_COUNT = 4

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 
                                                max_lr=0.001, 
                                                steps_per_epoch=int(len(train_dl)), 
                                                epochs=EPOCH_COUNT, 
                                                anneal_strategy='linear')

In [None]:
losses, val_losses = training(model, train_dl, val_dl, EPOCH_COUNT, criterion, optimizer, scheduler)

In [None]:
evaluate(model, val_dl)

In [None]:
# Save model to use in Gradio App
#torch.save(model.state_dict(), 'audio_classifier_model.pth')