## Import Required Modules

In [1]:
# Import Required Modules
import librosa
import numpy as np
from keras.models import load_model
import warnings
warnings.filterwarnings('ignore')

## Data Pre-processing Function

In [2]:
# function to extract features from audio file
def getFeaturesTest(filename):

    # Reading File
    y,sr=librosa.load(filename)

    # Mel Frequency Cepstral Coefficients
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)

    # Short Term Fourier Transform
    stft = np.abs(librosa.stft(y))

    # Chromagram from STFT
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)

    # Mel Spectrogram
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T,axis=0)

    # Spectral Contrasts
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)

    # Tonal Centroid Features
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(y),sr=sr).T,axis=0)

    # Flatten the array
    features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz), axis=0)
    return np.array([features])

## Loading Model Files and Defining Output Labels

In [3]:
# Loading the models
cnn = load_model('CNN_Best_Model.hdf5')
crnn = load_model('CRNN_Best_Model.hdf5')
bicrnn = load_model('BICRNN_Best_Model.hdf5')
resnet = load_model('RESNET_Best_Model.hdf5')

# Defining output labels
outputLabels = {
	0 : 'Axecutting Sound',
	1 : 'Chainsaw Sound',
	2 : 'Forest Sound',
	3 : 'Handsaw Sound',
	4 : 'Rain / Thunder Sound',
	5 : 'Wind Sound'
}

## Reading the Audio Files and Pre-Processing

In [4]:
audio1 = getFeaturesTest('Axecutting_test1.wav')
audio2 = getFeaturesTest('Rain&Thunder_test1.wav')
audio3 = getFeaturesTest('Wind_test1.wav')

y1 = 0
y2 = 4
y3 = 5

## Predicting using models and comparing outputs

#### Custom CNN Model

In [5]:
pred1 = np.argmax(cnn.predict(audio1, verbose=0), axis=1)
pred2 = np.argmax(cnn.predict(audio2, verbose=0), axis=1)
pred3 = np.argmax(cnn.predict(audio3, verbose=0), axis=1)

print('Audio File 1:')
print('Labelled Output :', outputLabels[y1])
print('Predicted Output :', outputLabels[pred1[0]], end='\n\n')

print('Audio File 2:')
print('Labelled Output :', outputLabels[y2])
print('Predicted Output :', outputLabels[pred2[0]], end='\n\n')

print('Audio File 3:')
print('Labelled Output :', outputLabels[y3])
print('Predicted Output :', outputLabels[pred3[0]])

Audio File 1:
Labelled Output : Axecutting Sound
Predicted Output : Axecutting Sound

Audio File 2:
Labelled Output : Rain / Thunder Sound
Predicted Output : Rain / Thunder Sound

Audio File 3:
Labelled Output : Wind Sound
Predicted Output : Wind Sound


#### CRNN Model

In [6]:
pred1 = np.argmax(crnn.predict(audio1, verbose=0), axis=1)
pred2 = np.argmax(crnn.predict(audio2, verbose=0), axis=1)
pred3 = np.argmax(crnn.predict(audio3, verbose=0), axis=1)

print('Audio File 1:')
print('Labelled Output :', outputLabels[y1])
print('Predicted Output :', outputLabels[pred1[0]], end='\n\n')

print('Audio File 2:')
print('Labelled Output :', outputLabels[y2])
print('Predicted Output :', outputLabels[pred2[0]], end='\n\n')

print('Audio File 3:')
print('Labelled Output :', outputLabels[y3])
print('Predicted Output :', outputLabels[pred3[0]])

Audio File 1:
Labelled Output : Axecutting Sound
Predicted Output : Axecutting Sound

Audio File 2:
Labelled Output : Rain / Thunder Sound
Predicted Output : Rain / Thunder Sound

Audio File 3:
Labelled Output : Wind Sound
Predicted Output : Forest Sound


#### BiCRNN Model

In [7]:
pred1 = np.argmax(bicrnn.predict(audio1, verbose=0), axis=1)
pred2 = np.argmax(bicrnn.predict(audio2, verbose=0), axis=1)
pred3 = np.argmax(bicrnn.predict(audio3, verbose=0), axis=1)

print('Audio File 1:')
print('Labelled Output :', outputLabels[y1])
print('Predicted Output :', outputLabels[pred1[0]], end='\n\n')

print('Audio File 2:')
print('Labelled Output :', outputLabels[y2])
print('Predicted Output :', outputLabels[pred2[0]], end='\n\n')

print('Audio File 3:')
print('Labelled Output :', outputLabels[y3])
print('Predicted Output :', outputLabels[pred3[0]])

Audio File 1:
Labelled Output : Axecutting Sound
Predicted Output : Axecutting Sound

Audio File 2:
Labelled Output : Rain / Thunder Sound
Predicted Output : Rain / Thunder Sound

Audio File 3:
Labelled Output : Wind Sound
Predicted Output : Wind Sound


#### Resnet Model

In [8]:
pred1 = np.argmax(resnet.predict(audio1, verbose=0), axis=1)
pred2 = np.argmax(resnet.predict(audio2, verbose=0), axis=1)
pred3 = np.argmax(resnet.predict(audio3, verbose=0), axis=1)

print('Audio File 1:')
print('Labelled Output :', outputLabels[y1])
print('Predicted Output :', outputLabels[pred1[0]], end='\n\n')

print('Audio File 2:')
print('Labelled Output :', outputLabels[y2])
print('Predicted Output :', outputLabels[pred2[0]], end='\n\n')

print('Audio File 3:')
print('Labelled Output :', outputLabels[y3])
print('Predicted Output :', outputLabels[pred3[0]])

Audio File 1:
Labelled Output : Axecutting Sound
Predicted Output : Axecutting Sound

Audio File 2:
Labelled Output : Rain / Thunder Sound
Predicted Output : Rain / Thunder Sound

Audio File 3:
Labelled Output : Wind Sound
Predicted Output : Axecutting Sound
