In [0]:
#Data manipulation
import numpy as np
import librosa
from sklearn.model_selection import train_test_split

#Model creation
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, LSTM
from keras.layers.normalization import BatchNormalization

#Files management
import os
from google.colab import files

#Visualization
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
#Data upload
#When the file selection pops up, you should select a zip file containing the data
#called SoundData.zip
files.upload()
!unzip SoundData.zip

data = []
labels = []
for i in os.listdir():
    if i[-1] == "v":
        data.append(i)
        labels.append(int(i.split('_')[0]))

In [0]:
#Data processing
mfcc_number = 40
mfcc_size = 660


X = []
for i in data:
    w, s = librosa.load(i, mono=True)
    mfcc = librosa.feature.mfcc(w,s, n_mfcc= mfcc_number)
    mfcc = np.pad(mfcc, ((0,0), (0, mfcc_size - len(mfcc[0]))), mode="constant", constant_values=0)
    X.append(mfcc)
    
X = np.array(X)
labels = np.array(labels)

train_x, test_x, train_y, test_y = train_test_split(X, labels, test_size = 0.2, random_state = 7)
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size = 0.2, random_state = 7)


print("Train: x:",np.shape(train_x),"y:",np.shape(train_y))
print("Validation: x:",np.shape(val_x),"y:",np.shape(val_y))
print("Test: x:",np.shape(test_x),"y:",np.shape(test_y))

in_shape = (mfcc_number,mfcc_size)
out_shape = len(np.unique(labels))

print("input_shape:",in_shape,"output_shape:",out_shape)

In [0]:
#Visualization

#Plots accuracy and loss for train and validation sets of a trained model
def plot_history(history):
    # Plot training & validation accuracy values
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

In [0]:
#Shows an example mfcc in linear and log scale

import librosa.display
plt.figure(figsize=(10, 4))
plt.subplot(2,1,1)
librosa.display.specshow(mfcc, x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('MFCC')

plt.subplot(2,1,2)
librosa.display.specshow(mfcc, x_axis='time', y_axis= "log")
plt.colorbar(format='%+2.0f dB')
plt.title('log scale MFCC')

plt.tight_layout()
plt.show()

In [0]:
#Model definition
model = Sequential()

model.add(LSTM(256, input_shape = in_shape, return_sequences=True, recurrent_dropout= 0.33))
model.add(Dropout(0.5))
model.add(Flatten())

model.add(Dense(128, activation= "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(out_shape,activation="softmax"))

model.compile(loss=keras.losses.sparse_categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['acc'])

In [0]:
#Setup
model.summary()
batch_size = 16
epochs = 32
keras.utils.plot_model(model, show_shapes=True)

In [0]:
#Training
history = model.fit(train_x, train_y, batch_size=batch_size, shuffle=True, epochs=epochs, validation_data=(val_x, val_y))

In [0]:
#Test
l,a = model.evaluate(test_x, test_y)
print("Test loss: {:.4f}, Test accuracy: {:.2f}%".format(l,a*100))

In [0]:
#Visualization
plot_history(history)

In [0]:
#Just a quick check on where the model made mistakes in the test set
#predicted label - true label - difference in confidence between the predicted label and true label
#when the difference is low the model almost predicted correctly
print("pred - true - diff")
for i,v in enumerate(model.predict(test_x)):
    
    if np.argmax(v) == test_y[i]:
        sign = "="
    else:
        sign = "≠"
    diff = np.amax(v) - v[test_y[i]]
    print("  ",np.argmax(v),sign,test_y[i],"   -",diff)
