# Inference an audio label by using the model trained on kaggle dataset
## Test audios from both kaggle dataset, as well as my own.
## The result is super good !

In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import sys, os
ROOT = "../"
sys.path.append(ROOT)

import numpy as np 
import torch 

import IPython.display as ipd

if 1: # my lib
    import utils.lib_io as lib_io
    import utils.lib_commons as lib_commons
    import utils.lib_datasets as lib_datasets
    import utils.lib_augment as lib_augment
    import utils.lib_ml as lib_ml
    import utils.lib_rnn as lib_rnn

In [4]:
# Create model

args = lib_rnn.set_default_args()
args.classes_txt = "../config/classes.names" 
args.load_model_from="../models/mine_accu_981/model_007.ckpt"

model = lib_rnn.create_RNN_model(args, args.load_model_from)

Load weights from  ../models/mine_accu_981/model_007.ckpt


In [5]:
# Load labels

classes = lib_io.read_list(args.classes_txt)
print(classes)

['one', 'two', 'three', 'four', 'five', 'front', 'back', 'left', 'right', 'stop', 'none']


In [19]:
# Function for randomly reading an audio 
def get_a_random_wav(audio_folder):
    names = lib_commons.get_filenames(audio_folder, "*/*.wav")
    idx = np.random.randint(len(names))
    name = names[idx]
    audio = lib_datasets.AudioClass(filename=name)
    return audio

In [8]:
# Predict audio label
def predict(x):
    idx = model.predict(x)
    return classes[idx]

def predict_audio(audio):
    if audio.mfcc is None:
        audio.compute_mfcc()
    x = audio.mfcc.T
    predicted_label = predict(x)
    true_label = audio.filename.split('/')[-2]
    print(f"Label: true = {true_label}, predict = {predicted_label}")
    return predicted_label

In [58]:
# Test by folder

audio_folder = "../data/data_train/"
audio = get_a_random_wav(audio_folder)
predicted_label = predict_audio(audio)

ipd.Audio(audio.data, rate=audio.sample_rate, autoplay=True)

Label: true = stop, predict = stop


In [60]:
# Test by single file

filename = "../data/data_tmp/audio_06-02-02-11-19-3288v32cwji.wav"
filename = "../data/data_tmp/audio_06-02-02-11-22-310h21j5zdp.wav"

audio = lib_datasets.AudioClass(filename=filename)

audio.compute_mfcc()
x = audio.mfcc.T
predicted_label = model.predict(x)

print(classes[predicted_label])
ipd.Audio(audio.data, rate=audio.sample_rate, autoplay=True)

one
