# Summary

This notebook explores the difference in wake word detection accuracy between the pvrecorder.PvRecorder and sounddevice library implementations. pvrecorder is preferrable for simplicity and performance, but the device index options are not the same as sounddevice, which can result in double management.

In [17]:
import pvporcupine
import pvrecorder
from palm_9000.settings import settings

porcupine = pvporcupine.create(
    access_key=settings.picovoice_access_key.get_secret_value(),
    keyword_paths=[settings.porcupine_keyword_path],
    model_path=settings.porcupine_model_path,
)
recorder = pvrecorder.PvRecorder(device_index=-1, frame_length=porcupine.frame_length)
recorder.start()
try:
    while True:
        pcm = recorder.read()
        idx = porcupine.process(pcm)
        if idx >= 0:
            print("Detected wake word!")
except KeyboardInterrupt:
    print("Stopping wake word detection...")

Detected wake word!
Stopping wake word detection...


In [None]:
import numpy as np
import sounddevice as sd
# from scipy.signal import resample_poly
from palm_9000.utils import resample
import pvporcupine


def wait_for_wake_word(device: int, input_rate: int):
    porcupine = pvporcupine.create(
        access_key=settings.picovoice_access_key.get_secret_value(),
        keyword_paths=[settings.porcupine_keyword_path],
        model_path=settings.porcupine_model_path,
    )

    frame_length = porcupine.frame_length  # usually 512
    target_rate = porcupine.sample_rate    # 16000 Hz
    resample_ratio = target_rate / input_rate

    # Number of input samples to resample into one Porcupine frame
    # ceil(512 / (16000/44100)) = 1412
    input_samples_per_frame = int(np.ceil(frame_length / resample_ratio))
    buffer = np.zeros(0, dtype=np.int16)

    print("Listening for wake word... (Press Ctrl+C to exit)")

    try:
        with sd.InputStream(
            samplerate=input_rate,
            blocksize=0,
            dtype="int16",
            channels=1,
            device=device,
        ) as stream:
            while True:
                audio_block, _ = stream.read(1024)
                buffer = np.concatenate([buffer, audio_block.flatten()])

                # Process as many full frames as we can
                while len(buffer) >= input_samples_per_frame:
                    chunk = buffer[:input_samples_per_frame]
                    buffer = buffer[input_samples_per_frame:]

                    resampled = resample(chunk, input_rate, target_rate)
                    if len(resampled) < frame_length:
                        continue

                    pcm = np.clip(resampled[:frame_length], -32768, 32767).astype(np.int16)

                    result = porcupine.process(pcm)
                    if result >= 0:
                        print("Wake word detected!")
                        return True

    except KeyboardInterrupt:
        print("\nInterrupted by user. Exiting gracefully.")
        return False


device = sd.query_devices(kind="input")
wait_for_wake_word(device["index"], int(device["default_samplerate"]))

Listening for wake word... (Press Ctrl+C to exit)

Interrupted by user. Exiting gracefully.


False