In [None]:
import librosa
import tensorflow as tf
import numpy as np

In [None]:
SAVED_MODEL_PATH = tf.keras.utils.get_file('favi_model_v2.h5', 
                                           "https://storage.googleapis.com/financial_speech_dataset_id/favi_model_v2.h5")
SAMPLES_TO_CONSIDER = 16000

In [None]:
class _keyword_prediction:

    model = None
    _mapping = ["sembilan", "tiga", "tujuh", "satu", "delapan", "enam",
                "tambah", "transfer", "lima", "empat", "nol", "dua"
                ]
    _instance = None


    def predict(self, file_path):
      #extract MFCC
      MFCCs = self.preprocess(file_path)

      #add 4-dim array: samples, time steps, coefficients, 1
      MFCCs = MFCCs[np.newaxis, ..., np.newaxis]

      #get the predicted label
      predictions = self.model.predict(MFCCs)
      predicted_index = np.argmax(predictions)
      predicted_keyword = self._mapping[predicted_index]
      return predicted_keyword


    def preprocess(self, file_path, num_mfcc=13, n_fft=2048, hop_length=512):

      #load audio file
      signal, sample_rate = librosa.load(file_path)

      if len(signal) >= SAMPLES_TO_CONSIDER:
        #ensure consistency of the length of the signal
        signal = signal[:SAMPLES_TO_CONSIDER]

        #extract MFCCs
        MFCCs = librosa.feature.mfcc(signal, sample_rate, n_mfcc=num_mfcc, n_fft=n_fft,
                                     hop_length=hop_length)
      return MFCCs.T

In [None]:
def keyword_prediction():
  #ensure an instance is created only the first time the factory function is called
  if _keyword_prediction._instance is None:
    _keyword_prediction._instance = _keyword_prediction()
    _keyword_prediction.model = tf.keras.models.load_model(SAVED_MODEL_PATH)
  return _keyword_prediction._instance

In [None]:
if __name__ == "__main__":

  kp = keyword_prediction()

  #make a prediction
  keyword0 = kp.predict("tujuh1.wav")
  keyword1 = kp.predict("tujuh2.wav")
  keyword2 = kp.predict("tujuh3.wav")
  keyword3 = kp.predict("dua4.wav")
  keyword4 = kp.predict("dua5.wav")
  keyword5 = kp.predict("satu1.wav")
  keyword6 = kp.predict("satu2.wav")
  keyword7 = kp.predict("satu3.wav")
  keyword8 = kp.predict("satu4.wav")
  keyword9 = kp.predict("satu5.wav")

  print(f'prediction: \n{keyword0} \n{keyword1} \n{keyword2} \n{keyword3} \n{keyword4} \n{keyword5} \n{keyword6} \n{keyword7} \n{keyword8} \n{keyword9}')