In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import tensorflow.keras as keras
import numpy as np
import librosa

MODEL_PATH = '/content/drive/MyDrive/speech/model.h5'
NUM_SAMPLES_TO_CONSIDER = 22050 # 1 sec

class _Keyword_Spotting_Service:

    model = None
    _mappings = ['eight',
        'sheila',
        'nine',
        'yes',
        'one',
        'no',
        'left',
        'tree',
        'bed',
        'bird',
        'go',
        'wow',
        'seven',
        'marvin',
        'dog',
        'three',
        'two',
        'house',
        'down',
        'six',
        'five',
        'off',
        'right',
        'cat',
        'zero',
        'four',
        'stop',
        'up',
        'on',
        'happy',
        ]
    _instance = None

    def predict(self, file_path):

        # extract MFCCs
        MFCCs = self.preprocess(file_path) # (# segments,  # coefficients)

        # convert 2d MFCCs array into 4d array -> (# samples, # segments,  # coefficients, # channels)
        MFCCs = MFCCs[np.newaxis, ..., np.newaxis]

        # make prediction
        predictions = self.model.predict(MFCCs)
        predicted_index = np.argmax(predictions)
        predicted_keyword = self._mappings[predicted_index]

        return predicted_keyword

    def preprocess(self, file_path, n_mfcc=13, n_fft=2048, hop_length=512):
        
        # load audio file
        signal, sr = librosa.load(file_path)

        # ensure consistency in the audio file length
        if len(signal) > NUM_SAMPLES_TO_CONSIDER:
            signal = signal[:NUM_SAMPLES_TO_CONSIDER]

        # extract MFCCs
        MFCCs = librosa.feature.mfcc(signal, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)

        return MFCCs.T

def Keyword_Spotting_Service():

    # ensure that we only have 1 instance of KSS
    if _Keyword_Spotting_Service._instance is None:
        _Keyword_Spotting_Service._instance = _Keyword_Spotting_Service()
        _Keyword_Spotting_Service.model = keras.models.load_model(MODEL_PATH)
    return _Keyword_Spotting_Service._instance

In [3]:
kss = Keyword_Spotting_Service()
keyword1 = kss.predict("/content/drive/MyDrive/speech/house/00b01445_nohash_2.wav")
print(f"Predicted keywords: {keyword1}")

Predicted keywords: house


In [4]:
 signal, sr = librosa.load("/content/drive/MyDrive/speech/house/00b01445_nohash_2.wav")