Add live transcription demo script (#313)

* add cli command to record live * live CLI command is functionnal * fix CLI command based on dependencies * fix tests * add portaudio for tests * try portaudio * new try portaudio * test install from source * move portaudio install
Wordcab · Sep 14, 2023 · 6539c6a · 6539c6a
1 parent c3362ae
commit 6539c6a
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 0 deletions.
diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
@@ -57,6 +57,9 @@ jobs:
         with:
           python-version: 3.8
 
+      - name: Install PortAudio
+        run: sudo apt-get install portaudio19-dev
+
       - name: install-dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,8 @@ path = "src/wordcab/__init__.py"
 
 [project.optional-dependencies]
 live = [
+  "numpy>=1.21.2",
+  "pyaudio>=0.2.11",
   "websockets>=11.0.3",
 ]
 docs = [
@@ -61,6 +63,8 @@ quality = [
 ]
 tests = [
   "httpx>=0.23.3",
+  "numpy>=1.21.2",
+  "pyaudio>=0.2.11",
   "pytest>=7.4",
   "pytest-asyncio>=0.21.1",
   "pytest-cov>=4.1",

diff --git a/src/wordcab/__main__.py b/src/wordcab/__main__.py
@@ -13,11 +13,24 @@
 # limitations under the License.
 
 """Command-line interface."""
+import asyncio
+from functools import wraps
+
 import click
 
 from .login import cli_login, cli_logout
 
 
+def coroutine(f) -> asyncio.coroutine:
+    """Decorator to run a function as a coroutine."""
+
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        return asyncio.run(f(*args, **kwargs))
+
+    return wrapper
+
+
 @click.group()
 @click.version_option()
 def main() -> None:
@@ -40,6 +53,38 @@ def logout() -> None:
 main.add_command(login)
 main.add_command(logout)
 
+try:
+    from .live import cli_live
+
+    @click.command()
+    @click.option(
+        "--server-url",
+        "-s",
+        default="ws://localhost:5001/api/v1/live",
+        help="Wordcab API Live server URL",
+    )
+    @click.option(
+        "--source-lang",
+        "-l",
+        default="en",
+        help="Source language of the audio",
+    )
+    @click.option(
+        "--api-key",
+        "-k",
+        default=None,
+        help="Wordcab API Key",
+    )
+    @coroutine
+    async def live(server_url: str, source_lang: str, api_key: str) -> None:
+        """Transcribe audio in real-time."""
+        await cli_live(server_url, source_lang, api_key)
+
+    main.add_command(live)
+
+except ImportError:
+    pass
+
 
 if __name__ == "__main__":
     main(prog_name="wordcab")  # pragma: no cover
diff --git a/src/wordcab/live.py b/src/wordcab/live.py
@@ -14,16 +14,26 @@
 
 """Live client feature to communicate with a websocket endpoint."""
 
+import asyncio
+import json
 import logging
+import threading
 from typing import Optional, Union
 
+import pyaudio
 import websockets
 
 from wordcab.login import get_token
 
 logger = logging.getLogger(__name__)
 
 
+CHUNK = 36000  # Number of audio frames per buffer
+FORMAT = pyaudio.paInt16  # Format for audio input (16-bit PCM)
+CHANNELS = 1  # Mono
+SR = 16000  # Sample rate
+
+
 class LiveClient:
     """Wordcab API LiveClient used to transcribe audio in real-time."""
 
@@ -76,3 +86,60 @@ async def send_audio(self, audio_data: bytes) -> str:
         response = await self.websocket.recv()
 
         return response
+
+
+async def cli_live(server_url: str, source_lang: str, api_key: str) -> None:
+    """Transcribe audio in real-time."""
+    async with LiveClient(server_url, source_lang, api_key) as live_client:
+        print("Connected to the live server.")
+
+        p = pyaudio.PyAudio()
+        queue = asyncio.Queue()
+
+        # Define function to run in a thread
+        def audio_thread():
+            stream = p.open(
+                format=FORMAT,
+                channels=CHANNELS,
+                rate=SR,
+                input=True,
+                frames_per_buffer=CHUNK,
+            )
+            print("Recording...")
+
+            while not exit_signal.is_set():
+                try:
+                    audio_data = stream.read(CHUNK, exception_on_overflow=False)
+                    loop.call_soon_threadsafe(queue.put_nowait, audio_data)
+                except OSError as e:
+                    if e.errno == -9981:
+                        # Input buffer overflow, let's continue
+                        continue
+                    else:
+                        raise
+
+            stream.stop_stream()
+            stream.close()
+
+        exit_signal = threading.Event()
+        loop = asyncio.get_event_loop()
+
+        # Start audio thread
+        thread = threading.Thread(target=audio_thread, daemon=True)
+        thread.start()
+
+        try:
+            while True:
+                # Get audio data from queue and send
+                audio_data = await queue.get()
+
+                json_result = await live_client.send_audio(audio_data)
+                if json_result:
+                    print(json.loads(json_result)["text"])
+
+        except KeyboardInterrupt:
+            print("Recording stopped.")
+            exit_signal.set()
+            thread.join()
+
+        p.terminate()