In [None]:
'''
This notebook is how to evaluate CNN Model which is built upon MelSpectogram images. 
A. Evaluation against good and bad (noise) signals
B. Evaluate real audio signals to see whether they do contain noise or not

'''

In [None]:
from keras import models
import tensorflow as tf
import tensorflow_hub as hub
import pickle
import pandas as pd
import os
import glob
import librosa
import numpy as np
from keras.preprocessing import image

In [None]:
model1 = tf.keras.models.load_model('/Models/CNN-Image-MelSpc-0422/AudioQuality_MobileNet_TL_GD_V1_04222.h5',custom_objects={'KerasLayer':hub.KerasLayer})
model1.load_weights('/Models/CNN-Image-MelSpc-0422/AudioQualityWeights_MobileNet_TL_GD_V1_0422.h5')
model1.summary()

In [None]:
with open('/Models/CNN-Image-MelSpc-0422/labelEncoderFile_04222020_v1.pkl', 'rb') as fid:
    le_loaded = pickle.load(fid)
le_loaded.classes_

In [None]:
def getImagesForMixData(model):
    parent_dir = 'D:/Abhishek/Machine Learning Models/Audio Data Analysis/Models/spectrogram'
    sub_dirs= ['noise', 'good']
    sub_sub_good_dirs= ['flicker', 'spear_good', 'urban']
    sub_sub_noise_dirs= ['audio_mixed_noise', 'spear_noise', 'only_noise']
    file_ext = "*.png"
    
    images = []
    lables = []
    predictLable = []
    img_width, img_height = 224, 224
    for l, sub_dir in enumerate(sub_dirs):
        print('Processing folder: ', sub_dir)
        if sub_dir == 'good':
            processed_sub_dir = sub_sub_good_dirs
        
        if sub_dir == 'noise':
            processed_sub_dir = sub_sub_noise_dirs   
            
        for l, sub_sub_dir in enumerate(processed_sub_dir):
            
            for fn in glob.glob(os.path.join(parent_dir, sub_dir, sub_sub_dir, file_ext)):

                img = image.load_img(fn, target_size=(img_width, img_height))
                img = image.img_to_array(img)
                
                img = np.expand_dims(img, axis=0)
                img=img/255
                
                predictLable.append(model.predict_classes(img)[0])
                images.append(img)

                if sub_dir == 'noise':
                    lables.append(1)

                if sub_dir == 'good':
                    lables.append(0)

    return predictLable, lables

In [None]:
prediction_result, actualresult_result = getImagesForMixData(model1)

classification_report = metrics.classification_report(actualresult_result, prediction_result)
print("Classification report: \n", classification_report)
confusion_matrix = metrics.confusion_matrix(actualresult_result, prediction_result)
print("Confusion matrix: \n",confusion_matrix)

print("Accuracy Score: ", accuracy_score(actualresult_result, prediction_result))
print("F1 Score: ", f1_score(actualresult_result, prediction_result, average='weighted'))
print("Precision Score: ", precision_score(actualresult_result, prediction_result, average='weighted'))
print("Recall Score: ", recall_score(actualresult_result, prediction_result, average='weighted'))

In [None]:
def audioImageExtraction(audio, sr, refID, melSpectogramPath, model, le):
    split_audio_duration = 1*sr
    startIndex = 0
    image_predictions = {} 

    if len(audio) > split_audio_duration:
        iteration = int(np.ceil(len(audio)/split_audio_duration))
        for i in range(iteration):
            endIndex = startIndex + split_audio_duration
            if endIndex > len(audio):
                endIndex = len(audio)
            split_audio = audio[startIndex: endIndex]
            startIndex = endIndex + 1
            imagePath = generateMelspectrogram(split_audio, sr, melSpectogramPath, refID, i)
            image_predictions[str(i)] = predictAudioImageClass(imagePath)
    else:
        split_audio = audio[startIndex: len(audio)]
        self.generateMelspectrogram(split_audio, sr, melSpectogramPath, refID, 0)

    return image_predictions

def generateMelspectrogram(split_audio, sr, melSpectogramPath, refID, i):
    fig = plt.figure(figsize=[0.72, 0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)

    fileName = os.path.join(melSpectogramPath, refID + '_' + str(i) + '.png')

    S = librosa.feature.melspectrogram(y=split_audio, sr=sr)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    plt.savefig(fileName, dpi=400, bbox_inches='tight', pad_inches=0)
    plt.close('all')
    return fileName

def predictAudioImageClass(imagePath, model, le):
        newsize = (224, 224)
        img = cv2.imread(imagePath)
        img = cv2.resize(img, newsize, interpolation=cv2.INTER_AREA)
        img = image.img_to_array(img)
        img = np.expand_dims(img, axis=0)
        img = img / 255
        prediction_result = model.predict_classes(img)[0]
        predictionValue = le.inverse_transform([prediction_result])
        return predictionValue[0]

In [None]:
melSpectogramPath = Path('/mel-spectogram-images')   
audio_path = Path('/audio/') 

files= list(Path(audio_path).glob('*.wav'))
for audio_file in files:
    file = str(audio_file).split('\\')[-1].split('.')[0]
    print('processing file: ', file)
    audio, sr = librosa.load(audio_file)
    predictions = audioImageExtraction(audio, sr, file, melSpectogramPath, model, le_loaded)