deepgram · GregHolmes · Apr 15, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
@@ -1,5 +1,5 @@
 {
-  "cliVersion": "4.46.0",
+  "cliVersion": "4.67.1",
   "generatorName": "fernapi/fern-python-sdk",
   "generatorVersion": "4.62.0",
   "generatorConfig": {
@@ -16,6 +16,6 @@
       "skip_validation": true
     }
   },
-  "originGitCommit": "879c76c78827f323e425c1640f76a6e50d6c68d3",
-  "sdkVersion": "6.0.2"
+  "originGitCommit": "aa8e0677bcaea82c02a5934c61d195b35921b33d",
+  "sdkVersion": "6.1.2"
 }
@@ -2,8 +2,12 @@
 Example: Live Transcription with WebSocket (Listen V1)
 
 This example shows how to stream audio for real-time transcription using WebSocket.
+It streams a pre-recorded audio file in chunks to simulate a live microphone feed.
 """
 
+import os
+import threading
+import time
 from typing import Union
 
 from dotenv import load_dotenv
@@ -23,33 +27,46 @@
 
 client = DeepgramClient()
 
+# Audio chunking: simulate real-time streaming by sending 1-second chunks
+sample_rate = 44100  # Hz (matches fixtures/audio.wav)
+chunk_size = sample_rate * 2  # 2 bytes per sample (linear16 PCM mono)
+chunk_delay = 1.0  # seconds between chunks
+
+audio_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures", "audio.wav")
+
 try:
     with client.listen.v1.connect(model="nova-3") as connection:
 
         def on_message(message: ListenV1SocketClientResponse) -> None:
             msg_type = getattr(message, "type", "Unknown")
-            print(f"Received {msg_type} event")
-
-            # Extract transcription from Results events
             if isinstance(message, ListenV1Results):
                 if message.channel and message.channel.alternatives:
                     transcript = message.channel.alternatives[0].transcript
                     if transcript:
                         print(f"Transcript: {transcript}")
+            else:
+                print(f"Received {msg_type} event")
 
         connection.on(EventType.OPEN, lambda _: print("Connection opened"))
         connection.on(EventType.MESSAGE, on_message)
         connection.on(EventType.CLOSE, lambda _: print("Connection closed"))
         connection.on(EventType.ERROR, lambda error: print(f"Error: {error}"))
 
-        # Start listening - this blocks until the connection closes
-        # In production, you would send audio data here:
-        # audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav")
-        # with open(audio_path, "rb") as audio_file:
-        #     audio_data = audio_file.read()
-        #     connection.send_listen_v_1_media(audio_data)
+        # Start listening in a background thread so we can send audio concurrently
+        threading.Thread(target=connection.start_listening, daemon=True).start()
+
+        # Stream audio file in chunks to simulate live microphone input
+        with open(audio_path, "rb") as f:
+            audio_data = f.read()
+
+        for i in range(0, len(audio_data), chunk_size):
+            chunk = audio_data[i : i + chunk_size]
+            if chunk:
+                connection.send_media(chunk)
+                time.sleep(chunk_delay)
 
-        connection.start_listening()
+        # Wait for final transcription results
+        time.sleep(2)
 
     # For async version:
     # from deepgram import AsyncDeepgramClient

@@ -36,14 +36,22 @@
     ) as connection:
 
         def on_message(message: ListenV2SocketClientResponse) -> None:
-            msg_type = getattr(message, "type", type(message).__name__)
-            print(f"Received {msg_type} event ({type(message).__name__})")
-
-            # Extract transcription from TurnInfo events
-            if isinstance(message, ListenV2TurnInfo):
+            # V2 messages may arrive as typed objects or dicts depending on the union match
+            if isinstance(message, dict):
+                msg_type = message.get("type", "Unknown")
+                print(f"Received {msg_type} event")
+                if msg_type == "TurnInfo":
+                    print(f"  transcript: {message.get('transcript', '')}")
+                    print(f"  event: {message.get('event', '')}")
+                    print(f"  turn_index: {message.get('turn_index', '')}")
+            elif isinstance(message, ListenV2TurnInfo):
+                print(f"Received TurnInfo event")
                 print(f"  transcript: {message.transcript}")
                 print(f"  event: {message.event}")
                 print(f"  turn_index: {message.turn_index}")
+            else:
+                msg_type = getattr(message, "type", type(message).__name__)
+                print(f"Received {msg_type} event")
 
         connection.on(EventType.OPEN, lambda _: print("Connection opened"))
         connection.on(EventType.MESSAGE, on_message)

@@ -10,27 +10,23 @@
 
 from deepgram import DeepgramClient
 from deepgram.helpers import TextBuilder, add_pronunciation, ssml_to_deepgram
-from deepgram.speak.v1.audio.types import (
-    AudioGenerateRequestEncoding,
-    AudioGenerateRequestModel,
-)
 
 
 def example_basic_text_builder():
     """Example 1: Basic TextBuilder usage with pronunciations and pauses"""
     print("Example 1: Basic TextBuilder Usage")
     print("-" * 50)
 
-    # Build text with pronunciations and pauses
+    # Build text with pronunciations
+    # Note: .pause() is supported in streaming (WebSocket) mode.
+    # For REST API, use plain text between pronunciations.
     text = (
         TextBuilder()
         .text("Take ")
         .pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn")
         .text(" twice daily with ")
         .pronunciation("dupilumab", "duːˈpɪljuːmæb")
-        .text(" injections")
-        .pause(500)
-        .text(" Do not exceed prescribed dosage.")
+        .text(" injections. Do not exceed prescribed dosage.")
         .build()
     )
 
@@ -42,15 +38,16 @@ def example_basic_text_builder():
         client = DeepgramClient(api_key=api_key)
 
         # Generate speech with custom pronunciations
-        response = client.speak.v1.generate(
-            text,
-            model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
-            encoding=AudioGenerateRequestEncoding.LINEAR16,
+        response = client.speak.v1.audio.generate(
+            text=text,
+            model="aura-2-asteria-en",
+            encoding="linear16",
         )
 
         # Save to file
         with open("output_example1.wav", "wb") as f:
-            f.write(response)
+            for chunk in response:
+                f.write(chunk)
 
         print("✓ Audio saved to output_example1.wav")
     else:
@@ -75,13 +72,14 @@ def example_add_pronunciation_function():
     if api_key:
         client = DeepgramClient(api_key=api_key)
 
-        response = client.speak.v1.generate(
-            text,
-            model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
+        response = client.speak.v1.audio.generate(
+            text=text,
+            model="aura-2-asteria-en",
         )
 
         with open("output_example2.wav", "wb") as f:
-            f.write(response)
+            for chunk in response:
+                f.write(chunk)
 
         print("✓ Audio saved to output_example2.wav")
     else:
@@ -96,10 +94,8 @@ def example_ssml_migration():
     # Existing SSML from another TTS provider
     ssml = """<speak>
         Welcome to your medication guide.
-        <break time="500ms"/>
-        Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme> 
+        Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme>
         as prescribed.
-        <break time="1000ms"/>
         Contact your doctor if you experience side effects.
     </speak>"""
 
@@ -112,13 +108,14 @@ def example_ssml_migration():
     if api_key:
         client = DeepgramClient(api_key=api_key)
 
-        response = client.speak.v1.generate(
-            text,
-            model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
+        response = client.speak.v1.audio.generate(
+            text=text,
+            model="aura-2-asteria-en",
         )
 
         with open("output_example3.wav", "wb") as f:
-            f.write(response)
+            for chunk in response:
+                f.write(chunk)
 
         print("✓ Audio saved to output_example3.wav")
     else:
@@ -137,9 +134,7 @@ def example_mixed_ssml_and_builder():
     text = (
         TextBuilder()
         .from_ssml(ssml)
-        .pause(500)
         .text(" Store at room temperature.")
-        .pause(500)
         .text(" Keep out of reach of children.")
         .build()
     )
@@ -150,13 +145,14 @@ def example_mixed_ssml_and_builder():
     if api_key:
         client = DeepgramClient(api_key=api_key)
 
-        response = client.speak.v1.generate(
-            text,
-            model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
+        response = client.speak.v1.audio.generate(
+            text=text,
+            model="aura-2-asteria-en",
         )
 
         with open("output_example4.wav", "wb") as f:
-            f.write(response)
+            for chunk in response:
+                f.write(chunk)
 
         print("✓ Audio saved to output_example4.wav")
     else:
@@ -172,19 +168,15 @@ def example_pharmacy_instructions():
         TextBuilder()
         .text("Prescription for ")
         .pronunciation("lisinopril", "laɪˈsɪnəprɪl")
-        .pause(300)
-        .text(" Take one tablet by mouth daily for hypertension.")
-        .pause(500)
+        .text(". Take one tablet by mouth daily for hypertension.")
         .text(" Common side effects may include ")
         .pronunciation("hypotension", "ˌhaɪpoʊˈtɛnʃən")
         .text(" or dizziness.")
-        .pause(500)
         .text(" Do not take with ")
         .pronunciation("aliskiren", "əˈlɪskɪrɛn")
         .text(" or ")
         .pronunciation("sacubitril", "səˈkjuːbɪtrɪl")
-        .pause(500)
-        .text(" Call your doctor if symptoms worsen.")
+        .text(". Call your doctor if symptoms worsen.")
         .build()
     )
 
@@ -194,14 +186,15 @@ def example_pharmacy_instructions():
     if api_key:
         client = DeepgramClient(api_key=api_key)
 
-        response = client.speak.v1.generate(
-            text,
-            model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
-            encoding=AudioGenerateRequestEncoding.LINEAR16,
+        response = client.speak.v1.audio.generate(
+            text=text,
+            model="aura-2-asteria-en",
+            encoding="linear16",
         )
 
         with open("output_example5.wav", "wb") as f:
-            f.write(response)
+            for chunk in response:
+                f.write(chunk)
 
         print("✓ Audio saved to output_example5.wav")
     else:

@@ -65,16 +65,13 @@ def on_message(message: SpeakV1SocketClientResponse) -> None:
             connection.on(EventType.ERROR, lambda error: print(f"✗ Error: {error}"))
 
             # Send the TextBuilder-generated text
-            text_message = SpeakV1Text(text=text)
-            connection.send_speak_v_1_text(text_message)
+            connection.send_text(SpeakV1Text(text=text))
 
             # Flush to ensure all text is processed
-            flush_message = SpeakV1Flush()
-            connection.send_speak_v_1_flush(flush_message)
+            connection.send_flush()
 
             # Close the connection when done
-            close_message = SpeakV1Close()
-            connection.send_speak_v_1_close(close_message)
+            connection.send_close()
 
             # Start listening - this blocks until the connection closes
             connection.start_listening()
@@ -138,10 +135,10 @@ def on_message(message: SpeakV1SocketClientResponse) -> None:
             # Send multiple messages
             for i, text in enumerate([intro, instruction1, instruction2, closing], 1):
                 print(f"Sending message {i}: {text[:50]}...")
-                connection.send_speak_v_1_text(SpeakV1Text(text=text))
+                connection.send_text(SpeakV1Text(text=text))
 
-            connection.send_speak_v_1_flush(SpeakV1Flush())
-            connection.send_speak_v_1_close(SpeakV1Close())
+            connection.send_flush()
+            connection.send_close()
 
             connection.start_listening()