In [20]:
from keras.models import load_model
import librosa
import numpy as np
import glob
import os
from collections import Counter
mydict = {
    0:"air_conditioner",
    1:"car_horn",
    2:"children_playing",
    3:"dog_bark",
    4:"drilling",
    5:"engine_idling",
    6:"gun_shot",
    7:"jackhammer",
    8:"siren",
    9:"street_music"
}

In [21]:
model = load_model('my_model.h5')

In [22]:
def windows(data, window_size):
    start = 0
    while start < len(data):
        yield start, start + window_size
        start += (window_size / 2)

def extract_features2(parent_dir,sub_dirs,file_ext="*.wav",bands = 60, frames = 41):
    window_size = 512 * (frames - 1)
    log_specgrams = []
    for l, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            sound_clip,s = librosa.load(fn)
            #label = fn.split('/')[2].split('-')[1]
            for (start,end) in windows(sound_clip,window_size):
                if(len(sound_clip[start:end]) == window_size):
                    signal = sound_clip[start:end]
                    melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
                    logspec = librosa.logamplitude(melspec)
                    logspec = logspec.T.flatten()[:, np.newaxis].T
                    log_specgrams.append(logspec)
                    #labels.append(label)

    log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)
    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis = 3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])

    return np.array(features)


In [23]:
test_x = extract_features2("joke",["only"])
print(test_x.shape)
classes = model.predict_classes(test_x)
most_common,num_most_common = Counter(classes).most_common(1)[0]
print(mydict[most_common])

(9, 60, 41, 2)
street_music
