In [1]:
# Model Training and Evaluation
# x_train -  train samples: 2185
# x_test - testing samples: 547
# categories train: 2185
# total samples

# Retrieve previously stored variables
%store -r x_train 
%store -r x_test 
%store -r y_train
%store -r y_test
%store -r yy
%store -r le

In [2]:
# Import libraries
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

# Extract the number of labels - 4 in our case
num_labels = yy.shape[1]

# Begin constructing ML model
# Create an object of the Sequential class
model = Sequential()

# Create input layer using the Dense function
numNodes = 128; # Trial and error - 1/4 of 256 bit - went from 64 to 32 because size dropped from 2.7k to 1.4k
numMFCC = 40;
# Input shape is the size of the input array (1-D array of 40 columns, 1 row)
model.add(Dense(numNodes, input_shape=(numMFCC,)));
# Specifying the activation function to be used - relu: Rectified Linear Activiation
model.add(Activation('relu'))
# Dropout value of 50% - means random half of neurons exluded from each update cycle. Used to prevent overfitting.
model.add(Dropout(0.5))


# Add subsequest hidden layer - DENSE function
model.add(Dense(numNodes))
model.add(Activation('relu'))
model.add(Dropout(0.5))

# Add the output layer - DENSE function
# Output nodes is the different categories
# Different usage of activation function
model.add(Dense(num_labels))
model.add(Activation('softmax'))

Using TensorFlow backend.


In [3]:
# Compile the model
# Optimizer "adam" is a typical optimizer used - variation SGD (stochastic gradient descent)
# SGD utilizes the gradient of the loss function with respects to the weight
# loss -> typical loss function 
# metrics is output to be displayed (accuracy is the output of the loss function (?))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [4]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
# Verbose - displays info if desired (verbose = 0 means silent, just print accuracy value)
# evaluate returns loss value and score value
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

# Accuracy - the metrics value evaluated based on loss function
print("Pre-training accuracy: %.4f%%" % accuracy)

display(score)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               5248      
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                

[16.881140518188477, 0.23999999463558197]

In [7]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 100
num_batch_size = 8 # Arbitrarily chose the value 8


start = datetime.now()

# Train the model for a fix number of epochs
# validation_data - data to evaluate the loss at the end of each epoch
# callbacks - display ModelCheckpoint
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

duration = datetime.now() - start
print("Training completed in time: ", duration)

Train on 1346 samples, validate on 150 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


In [8]:
# Test the Model

# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1]*100, "%")

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1]*100, "%")

Training Accuracy:  96.28528952598572 %
Testing Accuracy:  93.33333373069763 %


In [17]:
import librosa
import numpy as np

# Creating a function that extracts the MFCC features of an audio file
def extract_feature(file_name):
   
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None, None

    return np.array([mfccsscaled])

In [18]:
# Creating a function that will 
def print_prediction(file_name):
    
    # MFCCs of the specifc file contained in prediction_feature
    prediction_feature = extract_feature(file_name)

    # Directly maps to output
    predicted_vector = model.predict_classes(prediction_feature)
    
    # Inverse transform is used to convert encoded LabelEncoder() values back to strings
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    # Given new instance, model return probability (of belonging to each class) between 0 and 1 
    predicted_proba_vector = model.predict_proba(prediction_feature) 
    # Extract first array from array of arrays
    predicted_proba = predicted_proba_vector[0]
    
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

In [32]:
# Validation 
import os
from pathlib import Path

root_path = Path(os.getcwd()).parent.parent # Software Folder

# Random dog bark file
filename = root_path / "Training_Dataset" / "audio" / "fold1" /  "101415-3-0-2.wav"
print("Below is the prediction for a dog bark file")
print_prediction(filename)

# Siren file
#cwd = os.getcwd()
filename = root_path / "Training_Dataset" / "audio" / "fold3" /  "184623-8-0-1.wav"
print("\nBelow is the prediction for a siren file")
print_prediction(filename)

Below is the prediction for a dog bark file
The predicted class is: dog_bark 

car_horn 		 :  0.00000000000000000000095449140306
dog_bark 		 :  1.00000000000000000000000000000000
gun_shot 		 :  0.00000000003150995783740562217190
siren 		 :  0.00000000000000000274229501612994

Below is the prediction for a siren file
The predicted class is: siren 

car_horn 		 :  0.00000000000000000000000902081457
dog_bark 		 :  0.00000000001430626537374246609602
gun_shot 		 :  0.00000000000000000000000001330257
siren 		 :  1.00000000000000000000000000000000
