In [1]:
import tensorflow.keras as keras
import numpy as np
import librosa

In [2]:
MODEL_PATH = 'model.h5'
SAMPLES_TO_CONSIDER = 22050 # 1 sec

In [7]:
class _Keyword_Spotting_Service:
    #Singleton class for keyword spotting inference with trained models.
    
    model = None
    _mappings = [
        "augmented_dataset\\down",
        "augmented_dataset\\go",
        "augmented_dataset\\left",
        "augmented_dataset\\off",
        "augmented_dataset\\on",
        "augmented_dataset\\right",
        "augmented_dataset\\stop",
        "augmented_dataset\\up",
        "augmented_dataset\\wow",
        "augmented_dataset\\yes"
    ]
    _instance = None
    
    def predict(self, file_path):
        
        #extract MFCCs
        MFCCs = self.preprocess(file_path) # (# segments, # coefficients)
        
        #convert 2d MFCCs array into 4d array -> (samples, segments, coefficients, channels) 
        MFCCs = MFCCs[np.newaxis, ... , np.newaxis]
        
        #make prediction
        predictions = self.model.predict(MFCCs) #[[0.1, 0.6, 0.1, ...]]
        predicted_index = np.argmax(predictions)
        predicted_keyword = self._mappings[predicted_index]
        
        return predicted_keyword
        
        
    def preprocess(self, file_path, num_mfcc=13, n_fft=2048, hop_length=512):
        
        #load audio file
        signal, sr = librosa.load(file_path)
        
        #ensure consistency in the audio file length
        if len(signal) > SAMPLES_TO_CONSIDER:
            signal = signal[:SAMPLES_TO_CONSIDER]
        
        #extract MFCC
        MFCCs = librosa.feature.mfcc(signal, sr, n_mfcc=num_mfcc, n_fft=n_fft,hop_length=hop_length)
        
        return MFCCs.T
    
def Keyword_Spotting_Service():
    # ensure that we only have 1 instance of KSS
    if _Keyword_Spotting_Service._instance is None:
        _Keyword_Spotting_Service._instance = _Keyword_Spotting_Service()
        _Keyword_Spotting_Service.model = keras.models.load_model(MODEL_PATH)
    
    return _Keyword_Spotting_Service._instance

In [9]:
if __name__ == '__main__':
    
    kss = Keyword_Spotting_Service()
    kss1 = Keyword_Spotting_Service()
    assert kss is kss1
    keyword1 = kss.predict("test/down.wav")
    keyword2 = kss.predict("test/left.wav")
    
    print(f"Predicted Keywords: {keyword1},{keyword2}")

Predicted Keywords: augmented_dataset\down,augmented_dataset\left
