Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add device index to client #9

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ Recognized: delta echo foxtrot
```
λ py client.py -h
usage: client.py [-h] [-s SERVER] [-a AGGRESSIVENESS] [--nospinner]
[-w SAVEWAV]
[-w SAVEWAV] [-d DEVICE] [-v]

Streams raw audio data from microphone with VAD to server via WebSocket

Expand All @@ -124,7 +124,13 @@ optional arguments:
speech, 3 the most aggressive. Default: 3
--nospinner Disable spinner
-w SAVEWAV, --savewav SAVEWAV
Save .wav files of utterences to given directory
Save .wav files of utterences to given directory.
Example for current directory: -w .
-d DEVICE, --device DEVICE
Set audio device for input, according to system. The
default utilizes system-specified recording device.
-v, --verbose Print debugging info

```

## Contributions
Expand Down
49 changes: 40 additions & 9 deletions client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import threading, collections, queue, os, os.path
import wave
import pyaudio
import pprint
import webrtcvad
from lomond import WebSocket, events
from halo import Halo
Expand All @@ -25,7 +26,7 @@ class Audio(object):
CHANNELS = 1
BLOCKS_PER_SECOND = 50

def __init__(self, callback=None, buffer_s=0, flush_queue=True):
def __init__(self, callback=None, buffer_s=0, flush_queue=True, device_index=None):
def proxy_callback(in_data, frame_count, time_info, status):
callback(in_data)
return (None, pyaudio.paContinue)
Expand All @@ -38,6 +39,7 @@ def proxy_callback(in_data, frame_count, time_info, status):
channels=self.CHANNELS,
rate=self.sample_rate,
input=True,
input_device_index=device_index,
frames_per_buffer=self.block_size,
stream_callback=proxy_callback)
self.stream.start_stream()
Expand All @@ -61,6 +63,21 @@ def read_loop(self, callback):
for block in iter(self):
callback(block)

@staticmethod
def device_list():
"""Iterate and return the audio devices in the system."""
local_pa = pyaudio.PyAudio()
device_info = { "Input":[], "Output":[] }
device_count = local_pa.get_device_count()
for idx_dev in range(device_count):
local_info = local_pa.get_device_info_by_index(idx_dev)
for local_type in ["Output", "Input"]:
local_channels = f"max{local_type}Channels"
if local_channels in local_info and local_info[local_channels] > 0:
device_info[local_type].append({"device":idx_dev, "name":local_info["name"],
"channels":local_info[local_channels]})
return device_info

def __iter__(self):
"""Generator that yields all audio blocks from microphone."""
while True:
Expand Down Expand Up @@ -89,8 +106,8 @@ def write_wav(self, filename, data):
class VADAudio(Audio):
"""Filter & segment audio with voice activity detection."""

def __init__(self, aggressiveness=3):
super().__init__()
def __init__(self, aggressiveness=3, device_index=None):
super().__init__(device_index=device_index)
self.vad = webrtcvad.Vad(aggressiveness)

def vad_collector_simple(self, pre_padding_ms, blocks=None):
Expand Down Expand Up @@ -209,7 +226,11 @@ def on_event(event):
print_output("Connected!")
ready = True
elif isinstance(event, events.Text):
if 1: print_output("Recognized: %s" % event.text)
# TODO: modify for inclusion of timing information?
# TODO: what do we do with a rich / metadata return instead?

if len(event.text):
print_output("Recognized: %s" % event.text)
elif 1:
logging.debug(event)

Expand All @@ -221,15 +242,21 @@ def on_event(event):
websocket.close()

def main():
if ARGS.listdevice:
dict_devices = Audio.device_list()
print_output("Available devices...")
print_output(pprint.pprint(dict_devices))
return 0

vad_audio = VADAudio(aggressiveness=ARGS.aggressiveness, device_index=ARGS.device)

websocket = WebSocket(ARGS.server)
# TODO: compress?
print_output("Connecting to '%s'..." % websocket.url)

vad_audio = VADAudio(aggressiveness=ARGS.aggressiveness)
print_output("Listening (ctrl-C to exit)...")
audio_consumer_thread = threading.Thread(target=lambda: audio_consumer(vad_audio, websocket))
print_output("Listening (ctrl-C to exit)...")
audio_consumer_thread.start()

print_output("Connecting to '%s'..." % websocket.url)
websocket_runner(websocket)


Expand All @@ -246,7 +273,7 @@ def consumer(self, blocks):
else:
print('.', end='', flush=True)
length_ms = 0
VADAudio(consumer)
VADAudio(consumer, device_index=ARGS.device)
elif 1:
VADAudio.test_vad(3)

Expand All @@ -261,8 +288,12 @@ def consumer(self, blocks):
help="Disable spinner")
parser.add_argument('-w', '--savewav',
help="Save .wav files of utterences to given directory. Example for current directory: -w .")
parser.add_argument('-d', '--device', type=int, default=None,
help="Set audio device for input, according to system. The default utilizes system-specified recording device.")
parser.add_argument('-v', '--verbose', action='store_true',
help="Print debugging info")
parser.add_argument('-l', '--listdevice', action='store_true',
help="List available devices for live capture")
ARGS = parser.parse_args()

if ARGS.verbose: logging.getLogger().setLevel(10)
Expand Down