# Inference an audio label by using the model trained on kaggle dataset
## Test audios from both kaggle dataset, as well as my own.
## The result is super good !

In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import sys, os
ROOT = "../"
sys.path.append(ROOT)

import numpy as np 
import torch 

if 1: # my lib
    import utils.lib_io as lib_io
    import utils.lib_commons as lib_commons
    import utils.lib_datasets as lib_datasets
    import utils.lib_augment as lib_augment
    import utils.lib_ml as lib_ml
    import utils.lib_rnn as lib_rnn

In [5]:
# Create model

args = lib_rnn.set_default_args()
args.classes_txt = "../config/classes_kaggle.names" 
args.load_weights_from="../checkpoints/kaggle_accu_914/model_025.ckpt"

model = lib_rnn.create_RNN_model(args, args.load_weights_from)

Load weights from  ../checkpoints/kaggle_accu_914/model_025.ckpt


In [10]:
# Load labels

classes = lib_io.read_list(args.classes_txt)
print(classes)

['bird', 'happy', 'stop', 'zero', 'six', 'learn', 'off', 'no', 'sheila', 'forward', 'eight', 'one', 'seven', 'backward', 'tree', 'bed', 'wow', 'right', 'dog', 'follow', 'up', 'down', 'nine', 'two', 'cat', 'yes', 'on', 'visual', 'left', 'five', 'four', 'marvin', 'go', 'house', 'three']


In [38]:
# Function for randomly reading an audio 
def get_a_random_wav(audio_folder):
    names = lib_commons.get_filenames(audio_folder, "*/*.wav")
    idx = np.random.randint(len(names))
    name = names[idx]
    audio = lib_datasets.AudioClass(filename=name)
    return audio

In [39]:
# Predict audio label
def predict(x):
    idx = model.predict(x)
    return classes[idx]

def predict_audio(audio):
    if audio.mfcc is None:
        audio.compute_mfcc()
    x = audio.mfcc.T
    predicted_label = predict(x)
    true_label = audio.filename.split('/')[-2]
    print(f"Label: true = {true_label}, predict = {predicted_label}")
    return predicted_label

In [41]:
# Test on kaggle dataset

audio_folder = "../data/kaggle/"
audio = get_a_random_wav(audio_folder)
predicted_label = predict_audio(audio)

Label: true = no, predict = no


In [65]:
# Test on my own data

audio_folder = "../data/data_train/"
audio = get_a_random_wav(audio_folder)
predicted_label = predict_audio(audio)

Label: true = right, predict = right
