Skip to content

Commit

Permalink
Add live transcription demo script (#313)
Browse files Browse the repository at this point in the history
* add cli command to record live

* live CLI command is functionnal

* fix CLI command based on dependencies

* fix tests

* add portaudio for tests

* try portaudio

* new try portaudio

* test install from source

* move portaudio install
  • Loading branch information
Thomas Chaigneau committed Sep 14, 2023
1 parent c3362ae commit 6539c6a
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ jobs:
with:
python-version: 3.8

- name: Install PortAudio
run: sudo apt-get install portaudio19-dev

- name: install-dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ path = "src/wordcab/__init__.py"

[project.optional-dependencies]
live = [
"numpy>=1.21.2",
"pyaudio>=0.2.11",
"websockets>=11.0.3",
]
docs = [
Expand All @@ -61,6 +63,8 @@ quality = [
]
tests = [
"httpx>=0.23.3",
"numpy>=1.21.2",
"pyaudio>=0.2.11",
"pytest>=7.4",
"pytest-asyncio>=0.21.1",
"pytest-cov>=4.1",
Expand Down
45 changes: 45 additions & 0 deletions src/wordcab/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,24 @@
# limitations under the License.

"""Command-line interface."""
import asyncio
from functools import wraps

import click

from .login import cli_login, cli_logout


def coroutine(f) -> asyncio.coroutine:
"""Decorator to run a function as a coroutine."""

@wraps(f)
def wrapper(*args, **kwargs):
return asyncio.run(f(*args, **kwargs))

return wrapper


@click.group()
@click.version_option()
def main() -> None:
Expand All @@ -40,6 +53,38 @@ def logout() -> None:
main.add_command(login)
main.add_command(logout)

try:
from .live import cli_live

@click.command()
@click.option(
"--server-url",
"-s",
default="ws://localhost:5001/api/v1/live",
help="Wordcab API Live server URL",
)
@click.option(
"--source-lang",
"-l",
default="en",
help="Source language of the audio",
)
@click.option(
"--api-key",
"-k",
default=None,
help="Wordcab API Key",
)
@coroutine
async def live(server_url: str, source_lang: str, api_key: str) -> None:
"""Transcribe audio in real-time."""
await cli_live(server_url, source_lang, api_key)

main.add_command(live)

except ImportError:
pass


if __name__ == "__main__":
main(prog_name="wordcab") # pragma: no cover
67 changes: 67 additions & 0 deletions src/wordcab/live.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,26 @@

"""Live client feature to communicate with a websocket endpoint."""

import asyncio
import json
import logging
import threading
from typing import Optional, Union

import pyaudio
import websockets

from wordcab.login import get_token

logger = logging.getLogger(__name__)


CHUNK = 36000 # Number of audio frames per buffer
FORMAT = pyaudio.paInt16 # Format for audio input (16-bit PCM)
CHANNELS = 1 # Mono
SR = 16000 # Sample rate


class LiveClient:
"""Wordcab API LiveClient used to transcribe audio in real-time."""

Expand Down Expand Up @@ -76,3 +86,60 @@ async def send_audio(self, audio_data: bytes) -> str:
response = await self.websocket.recv()

return response


async def cli_live(server_url: str, source_lang: str, api_key: str) -> None:
"""Transcribe audio in real-time."""
async with LiveClient(server_url, source_lang, api_key) as live_client:
print("Connected to the live server.")

p = pyaudio.PyAudio()
queue = asyncio.Queue()

# Define function to run in a thread
def audio_thread():
stream = p.open(
format=FORMAT,
channels=CHANNELS,
rate=SR,
input=True,
frames_per_buffer=CHUNK,
)
print("Recording...")

while not exit_signal.is_set():
try:
audio_data = stream.read(CHUNK, exception_on_overflow=False)
loop.call_soon_threadsafe(queue.put_nowait, audio_data)
except OSError as e:
if e.errno == -9981:
# Input buffer overflow, let's continue
continue
else:
raise

stream.stop_stream()
stream.close()

exit_signal = threading.Event()
loop = asyncio.get_event_loop()

# Start audio thread
thread = threading.Thread(target=audio_thread, daemon=True)
thread.start()

try:
while True:
# Get audio data from queue and send
audio_data = await queue.get()

json_result = await live_client.send_audio(audio_data)
if json_result:
print(json.loads(json_result)["text"])

except KeyboardInterrupt:
print("Recording stopped.")
exit_signal.set()
thread.join()

p.terminate()

0 comments on commit 6539c6a

Please sign in to comment.