In [None]:
import pyaudio
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio
import librosa
import soundfile as sf
import numpy as np

p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
num_devices = info.get('deviceCount')

print("Available audio input devices:")
for i in range(num_devices):
    device_info = p.get_device_info_by_host_api_device_index(0, i)
    if device_info.get('maxInputChannels') > 0:
        print(f"Device {i}: {device_info.get('name')}")


Method 1: recording sound and passing it as a .wav file to the model


Part 1 : recording the sound

In [None]:
def rec():
    import pyaudio
    import wave

    CHUNK = 512
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 48000
    RECORD_SECONDS = 10
    WAVE_OUTPUT_FILENAME = "output.wav"

    p = pyaudio.PyAudio()

    # print the available devices
    info = p.get_host_api_info_by_index(0)
    num_devices = info.get('deviceCount')
    for i in range(num_devices):
        if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
            print(f"Input Device id {i} - {p.get_device_info_by_host_api_device_index(0, i).get('name')}")

    device_id = int(input("Select the device id: "))

    print(f"Recording from {p.get_device_info_by_host_api_device_index(0, device_id).get('name')} for {RECORD_SECONDS} seconds...")

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    input_device_index=device_id,
                    frames_per_buffer=CHUNK)

    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording complete.")

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

    print(f"Exported audio as {WAVE_OUTPUT_FILENAME}")


In [None]:
rec()

In [None]:
interpreter = tf.lite.Interpreter(model_path ='tf_lite_model.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape: ", input_details[0]['shape'])
print("Input Type: ", input_details[0]['dtype'])
print("Input Shape: ", output_details[0]['shape'])
print("Input Type: ", output_details[0]['dtype'])

In [None]:
def features_extractor(file):
    n_frames=120
    audio, sample_rate = librosa.load(file, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    #++
    frames = np.array_split(mfccs_features.T, n_frames)
    features = np.concatenate(frames, axis=0)
    
    # Compute the mean of each feature across frames
    mean_features = np.mean(features, axis=0)                                
                                    
    
    return mean_features  


In [None]:
wav_array = features_extractor('output.wav')
print(wav_array.shape)
new_arr = wav_array.reshape((1, 40, 1))
print(new_arr.shape)


In [None]:
interpreter.set_tensor(input_details[0]['index'], new_arr)
interpreter.invoke()
tflite_model_predictions = interpreter.get_tensor(output_details[0]['index'])
max = np.max(tflite_model_predictions)
print(np.argmax(tflite_model_predictions))
print(max)

In [29]:
def print_prediction_result():
    species = "unknown"
    if np.argmax(tflite_model_predictions) == 0:
        species = "bewickii"
    elif np.argmax(tflite_model_predictions) == 1:
        species = "polyglottos"
    elif np.argmax(tflite_model_predictions) == 2:
        species = "migratorius"
    elif np.argmax(tflite_model_predictions) == 3:
        species = "melodia"
    elif np.argmax(tflite_model_predictions) == 4:
        species = "cardinalis"
    print ("the model predicts this song is probably done by a bird from the species of ", species)

print_prediction_result()

the model predicts this song is probably done by a bird from the species of  melodia


In [31]:
def run_prediction():
    rec()
    wav_array = features_extractor('output.wav')
    new_arr = wav_array.reshape((1, 40, 1))
    interpreter = tf.lite.Interpreter(model_path ='tf_lite_model.tflite')
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], new_arr)
    interpreter.invoke()
    tflite_model_predictions = interpreter.get_tensor(output_details[0]['index'])
    species = "unknown"
    if np.argmax(tflite_model_predictions) == 0:
        species = "bewickii"
    elif np.argmax(tflite_model_predictions) == 1:
        species = "polyglottos"
    elif np.argmax(tflite_model_predictions) == 2:
        species = "migratorius"
    elif np.argmax(tflite_model_predictions) == 3:
        species = "melodia"
    elif np.argmax(tflite_model_predictions) == 4:
        species = "cardinalis"
    print ("the model predicts this song is probably done by a bird from the species of ", species)

In [35]:
run_prediction()

Input Device id 6 - STM32 AUDIO Streaming in FS Mod: USB Audio (hw:1,0)
Input Device id 15 - pulse
Input Device id 19 - default
Recording from STM32 AUDIO Streaming in FS Mod: USB Audio (hw:1,0) for 10 seconds...
Recording complete.
Exported audio as output.wav
the model predicts this song is probably done by a bird from the species of  melodia


In [28]:
print_prediction_result()

the model predicts this song is probably done by a bird from the species of  melodia


Method 2 (ADVANCED) passing sound and dealing with it in real-time

In [None]:
import sounddevice as sd
import numpy as np

# Set parameters for recording
duration = 60  # seconds
fs = 48000  # sampling rate
device = 6  # device id for recording (id 6 is for the current testing settings)
global sp
sp = False #boolean to keep in check the number of times a surpass of amplitude is detected

# Define function to print "passed" when amplitude goes over a certain level
def audio_callback(indata, frames, time, status):
    global sp
    amplitude = np.abs(indata).max()
    if amplitude > 0.25 and sp == False:
        sp = True
        print("passed")
    elif amplitude < 0.25 and sp == True:
        sp = False
    return None

# Start recording
print("Recording...")
with sd.InputStream(channels=1, samplerate=fs, device=device, callback=audio_callback):
    sd.sleep(int(duration * 1000))


In [None]:
asder = 55