In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
pip install --upgrade google-api-python-client

In [None]:
from google.cloud import storage
from google.api_core.client_options import ClientOptions
import googleapiclient.discovery
import numpy as np
import librosa
import tensorflow as tf
import json

In [None]:
def predict_json(project, region, model, instances, version=None):
    prefix = "{}-ml".format(region) if region else "ml"
    api_endpoint = "https://{}.googleapis.com".format(prefix)
    client_options = ClientOptions(api_endpoint=api_endpoint)
    service = googleapiclient.discovery.build('ml', 
                                              'v1',
                                              client_options=client_options
                                              )
    name = 'projects/{}/models/{}'.format(project, model)

    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={'instances': instances}
    ).execute()

    if 'error' in response:
        raise RuntimeError(response['error'])

    return response['predictions']

In [None]:
  _mapping = ["sembilan", 
              "tiga", 
              "tujuh", 
              "satu", 
              "delapan", 
              "enam",
              "tambah", 
              "transfer", 
              "lima", 
              "empat", 
              "nol", 
              "dua"
             ]
  SAMPLES_TO_CONSIDER = 16000

  def preprocess(file_path, num_mfcc=13, n_fft=2048, hop_length=512):

    #load audio file
    signal, sample_rate = librosa.load(file_path)

    if len(signal) >= SAMPLES_TO_CONSIDER:
      #ensure consistency of the length of the signal
      signal = signal[:SAMPLES_TO_CONSIDER]
  
      #extract MFCCs
      MFCCs = librosa.feature.mfcc(signal, sample_rate, n_mfcc=num_mfcc, n_fft=n_fft,
                                   hop_length=hop_length)
    return MFCCs.T

In [None]:
audio_file = '000A_tambah.wav'

In [None]:
MFCCs = preprocess(audio_file)
MFCCs = MFCCs[np.newaxis, ..., np.newaxis]

In [None]:
project = 'the-late-night-studio'
region = 'asia-southeast1'
model = 'favi_speech_model'
version = 'v02'
instances = MFCCs.tolist()
test_predictions = predict_json(project, region, model, instances, version)
idx = np.argmax(test_predictions[0])
print(idx)

In [None]:
prediction = _mapping[idx]
print('favi prediction: {}'.format(prediction))
response_json = {"Prediction": prediction}

In [None]:
pip freeze > requirements.txt