In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
import tensorflow as tf
import librosa


CSV_FILE_PATH = "data/esc50.csv"  # path of csv file
DATA_PATH = "data/audio/44100/" # path to folder containing audio files

class_dict = {
    'dog': 0,
    'chirping_birds': 1,
    'thunderstorm': 2,
    'keyboard_typing': 3,
    'car_horn': 4,
    'drinking_sipping': 5,
    'rain': 6,
    'breathing': 7,
    'coughing': 8,
    'cat': 9
 }

In [2]:
df = pd.read_csv(CSV_FILE_PATH)
sample = df[df["category"] == "dog"].iloc[0]

In [3]:
# Perform prediction on a single instance

X = []
sample_path = sample["filename"]
signal, sr = librosa.load(DATA_PATH+sample_path)
mfcc_ = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
X.append(mfcc_)
X = np.array(X)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

In [4]:
# Load the model
model = keras.models.load_model("model/model.h5")

Metal device set to: Apple M2 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2024-01-14 17:19:56.373696: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-14 17:19:56.373811: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)

tflite_model = converter.convert()

with open('model/sound-model.tflite', 'wb') as f_out:
    f_out.write(tflite_model)



INFO:tensorflow:Assets written to: /var/folders/rz/hqy45d3569g71gtmcbpt6vkr0000gp/T/tmpu51uyhy_/assets


INFO:tensorflow:Assets written to: /var/folders/rz/hqy45d3569g71gtmcbpt6vkr0000gp/T/tmpu51uyhy_/assets
2024-01-14 17:19:59.789278: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2024-01-14 17:19:59.789290: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2024-01-14 17:19:59.790214: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/rz/hqy45d3569g71gtmcbpt6vkr0000gp/T/tmpu51uyhy_
2024-01-14 17:19:59.793113: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2024-01-14 17:19:59.793121: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/rz/hqy45d3569g71gtmcbpt6vkr0000gp/T/tmpu51uyhy_
2024-01-14 17:19:59.798635: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2024-01-14 17:19:59.800097: I tensorflow/cc/saved_model/load

In [6]:
import tensorflow.lite as tflite

interpreter = tflite.Interpreter(model_path='model/sound-model.tflite')
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]['index']
output_index = interpreter.get_output_details()[0]['index']

In [7]:
interpreter.set_tensor(input_index, X)
interpreter.invoke()
preds = interpreter.get_tensor(output_index)


In [8]:
classes = list(class_dict.keys())

In [9]:
dict(zip(classes, preds[0]))

{'dog': 0.97222453,
 'chirping_birds': 3.269388e-06,
 'thunderstorm': 9.658017e-07,
 'keyboard_typing': 4.6565274e-06,
 'car_horn': 5.512385e-05,
 'drinking_sipping': 0.0009613624,
 'rain': 9.4963795e-08,
 'breathing': 5.2852876e-05,
 'coughing': 0.024749089,
 'cat': 0.0019480572}