# Classifying <a href = "https://magenta.tensorflow.org/datasets/nsynth">NSynth Instruments</a> with Fully Connected Feedforward Neural Nets


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from scipy import signal
import librosa

# Download data to your colab instance
#!wget https://raw.githubusercontent.com/ursinus-cs372-s2023/Week13_NSynthClassify/main/data/nsynth_valid.mp3
#!wget https://raw.githubusercontent.com/ursinus-cs372-s2023/Week13_NSynthClassify/main/data/nsynth_test.mp3

# Torch Data Loader for NSynth Data


In [7]:
LABELS = ['bass_electronic', 'bass_synthetic', 'brass_acoustic', 'flute_acoustic', 'flute_synthetic', 'guitar_acoustic', 'guitar_electronic', 'keyboard_acoustic', 'keyboard_electronic', 'keyboard_synthetic', 'mallet_acoustic', 'organ_electronic', 'reed_acoustic', 'string_acoustic', 'vocal_acoustic', 'vocal_synthetic']

class Synth(Dataset):
    def __init__(self, audio_filename, labels_filename, sr=8000, sample_len=4):
        """
        Parameters
        ----------
        audio_filename: string
            Path to audio file
        labels_filename: string
            Path to labels file
        sr: int
            Audio sample rate to use
        sample_len: int
            Length of each sample, in seconds
        """
        self.x, self.sr = librosa.load(audio_filename, sr=sr)
        self.labels = np.loadtxt(labels_filename)
        print("Finished loading audio ", audio_filename)
        self.sample_len = sample_len
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        """
        Return a tuple (x, y)
        """
        ## TODO: Fill this in
        ## Return the idxth clip and a 1-hot vector representing its class
        L = self.sr*self.sample_len
        x = self.x[idx*L:(idx+1)*L]
        label = int(self.labels[idx])
        y = np.zeros(len(LABELS))
        y[label] = 1
        x = librosa.feature.mfcc(y=x, sr=self.sr).flatten()
        x = torch.from_numpy(np.array(x, dtype=np.float32))
        y = torch.from_numpy(np.array(y, dtype=np.float32))
        return x, y

In [5]:
sr = 8000
sample_len = 4
data_train = Synth("data/nsynth_valid.mp3", "labels_valid.txt", sr, sample_len)
data_test = Synth("data/nsynth_test.mp3", "labels_test.txt", sr, sample_len)

Finished loading audio  data/nsynth_valid.mp3
Finished loading audio  data/nsynth_test.mp3


## PyTorch Training Loop

In [None]:
def plot_confusion_matrix(labels, outputs):
    from scipy import sparse
    x1 = torch.argmax(labels, axis=1).detach().cpu()
    x2 = torch.argmax(outputs, axis=1).detach().cpu()
    I = np.array(x1.numpy(), dtype=int)
    J = np.array(x2.numpy(), dtype=int)
    K = len(LABELS)
    D = sparse.coo_matrix((np.ones(I.size), (I, J)), shape=(K, K))
    plt.imshow(D.toarray())
    plt.xticks(np.arange(K), LABELS, rotation='vertical')
    plt.yticks(np.arange(K), LABELS)
    plt.ylabel("Ground Truth Label")
    plt.xlabel("Predicted Label")
    correct = 100*np.sum(np.diag(D))/np.sum(D)
    plt.title("{:.3f}% Correct".format(correct))
    return np.sum(np.diag(D))

In [None]:
# Try to use the GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
print("Device: ", device)

## Step 2: Create sequential neural net model (setup a function space)

## TODO: Create your model
model = nn.Sequential() ## TODO: More stuff here

# Output of layer 3 will go through a logistic function

## Step 3: Setup the loss function
loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

n_epochs = 10 # Each "epoch" is a loop through the entire dataset
# and we use this to update the parameters
losses = []
accuracy = []
for epoch in range(n_epochs):
    print(".", end="")
    loader = DataLoader(data_train, batch_size=64, shuffle=True)
    for X, Y in loader: # Go through each mini batch
        # Move inputs/outputs to GPU
        X = X.to(device)
        Y = Y.to(device)
        # Reset the optimizer's gradients
        optimizer.zero_grad()
        # Run the sequential model on all inputs
        Y_est = model(X)
        # Compute the loss function comparing Y_est to Y
        loss = loss_fn(Y_est, Y)
        # Compute the gradients of the loss function with respect
        # to all of the parameters of the model
        loss.backward()
        # Update the parameters based on the gradient and
        # the optimization scheme
        optimizer.step()
        
    # Look at results on test set
    test_loader = DataLoader(data_test, batch_size=len(data_test))
    inputs, labels = next(iter(test_loader))
    inputs = inputs.to(device)
    labels = labels.to(device)
    outputs = model(inputs)
    total_loss = loss_fn(outputs, labels)
    losses.append(total_loss.item())
    plt.figure()
    num_correct = plot_confusion_matrix(labels, outputs)
    accuracy.append(num_correct/len(test_data))
    print("Epoch {}, accuracy {:.3f}".format(epoch, accuracy[-1]))

## Plot Optimization Progress

In [None]:
plt.figure()
plt.subplot(211)
plt.plot(losses)
plt.title("Losses")
plt.xlabel("Epoch")
plt.subplot(212)
plt.plot(np.array(accuracy)*100)
plt.title("Accuracy")
plt.xlabel("Epoch")
plt.tight_layout()