Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
6 changes: 3 additions & 3 deletions .fern/metadata.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"cliVersion": "4.46.0",
"cliVersion": "4.67.1",
"generatorName": "fernapi/fern-python-sdk",
"generatorVersion": "4.62.0",
"generatorConfig": {
Expand All @@ -16,6 +16,6 @@
"skip_validation": true
}
},
"originGitCommit": "879c76c78827f323e425c1640f76a6e50d6c68d3",
"sdkVersion": "6.0.2"
"originGitCommit": "aa8e0677bcaea82c02a5934c61d195b35921b33d",
"sdkVersion": "6.1.2"
}
4 changes: 0 additions & 4 deletions context7.json

This file was deleted.

37 changes: 27 additions & 10 deletions examples/13-transcription-live-websocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
Example: Live Transcription with WebSocket (Listen V1)

This example shows how to stream audio for real-time transcription using WebSocket.
It streams a pre-recorded audio file in chunks to simulate a live microphone feed.
"""

import os
import threading
import time
from typing import Union

from dotenv import load_dotenv
Expand All @@ -23,33 +27,46 @@

client = DeepgramClient()

# Audio chunking: simulate real-time streaming by sending 1-second chunks
sample_rate = 44100 # Hz (matches fixtures/audio.wav)
chunk_size = sample_rate * 2 # 2 bytes per sample (linear16 PCM mono)
chunk_delay = 1.0 # seconds between chunks

audio_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures", "audio.wav")

try:
with client.listen.v1.connect(model="nova-3") as connection:

def on_message(message: ListenV1SocketClientResponse) -> None:
msg_type = getattr(message, "type", "Unknown")
print(f"Received {msg_type} event")

# Extract transcription from Results events
if isinstance(message, ListenV1Results):
if message.channel and message.channel.alternatives:
transcript = message.channel.alternatives[0].transcript
if transcript:
print(f"Transcript: {transcript}")
else:
print(f"Received {msg_type} event")

connection.on(EventType.OPEN, lambda _: print("Connection opened"))
connection.on(EventType.MESSAGE, on_message)
connection.on(EventType.CLOSE, lambda _: print("Connection closed"))
connection.on(EventType.ERROR, lambda error: print(f"Error: {error}"))

# Start listening - this blocks until the connection closes
# In production, you would send audio data here:
# audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav")
# with open(audio_path, "rb") as audio_file:
# audio_data = audio_file.read()
# connection.send_listen_v_1_media(audio_data)
# Start listening in a background thread so we can send audio concurrently
threading.Thread(target=connection.start_listening, daemon=True).start()

# Stream audio file in chunks to simulate live microphone input
with open(audio_path, "rb") as f:
audio_data = f.read()

for i in range(0, len(audio_data), chunk_size):
chunk = audio_data[i : i + chunk_size]
if chunk:
connection.send_media(chunk)
time.sleep(chunk_delay)

connection.start_listening()
# Wait for final transcription results
time.sleep(2)

# For async version:
# from deepgram import AsyncDeepgramClient
Expand Down
18 changes: 13 additions & 5 deletions examples/14-transcription-live-websocket-v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,22 @@
) as connection:

def on_message(message: ListenV2SocketClientResponse) -> None:
msg_type = getattr(message, "type", type(message).__name__)
print(f"Received {msg_type} event ({type(message).__name__})")

# Extract transcription from TurnInfo events
if isinstance(message, ListenV2TurnInfo):
# V2 messages may arrive as typed objects or dicts depending on the union match
if isinstance(message, dict):
msg_type = message.get("type", "Unknown")
print(f"Received {msg_type} event")
if msg_type == "TurnInfo":
print(f" transcript: {message.get('transcript', '')}")
print(f" event: {message.get('event', '')}")
print(f" turn_index: {message.get('turn_index', '')}")
elif isinstance(message, ListenV2TurnInfo):
print(f"Received TurnInfo event")
print(f" transcript: {message.transcript}")
print(f" event: {message.event}")
print(f" turn_index: {message.turn_index}")
else:
msg_type = getattr(message, "type", type(message).__name__)
print(f"Received {msg_type} event")

connection.on(EventType.OPEN, lambda _: print("Connection opened"))
connection.on(EventType.MESSAGE, on_message)
Expand Down
75 changes: 34 additions & 41 deletions examples/23-text-builder-helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,23 @@

from deepgram import DeepgramClient
from deepgram.helpers import TextBuilder, add_pronunciation, ssml_to_deepgram
from deepgram.speak.v1.audio.types import (
AudioGenerateRequestEncoding,
AudioGenerateRequestModel,
)


def example_basic_text_builder():
"""Example 1: Basic TextBuilder usage with pronunciations and pauses"""
print("Example 1: Basic TextBuilder Usage")
print("-" * 50)

# Build text with pronunciations and pauses
# Build text with pronunciations
# Note: .pause() is supported in streaming (WebSocket) mode.
# For REST API, use plain text between pronunciations.
text = (
TextBuilder()
.text("Take ")
.pronunciation("azathioprine", "ˌæzəˈθaɪəpriːn")
.text(" twice daily with ")
.pronunciation("dupilumab", "duːˈpɪljuːmæb")
.text(" injections")
.pause(500)
.text(" Do not exceed prescribed dosage.")
.text(" injections. Do not exceed prescribed dosage.")
.build()
)

Expand All @@ -42,15 +38,16 @@ def example_basic_text_builder():
client = DeepgramClient(api_key=api_key)

# Generate speech with custom pronunciations
response = client.speak.v1.generate(
text,
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
encoding=AudioGenerateRequestEncoding.LINEAR16,
response = client.speak.v1.audio.generate(
text=text,
model="aura-2-asteria-en",
encoding="linear16",
)

# Save to file
with open("output_example1.wav", "wb") as f:
f.write(response)
for chunk in response:
f.write(chunk)

print("✓ Audio saved to output_example1.wav")
else:
Expand All @@ -75,13 +72,14 @@ def example_add_pronunciation_function():
if api_key:
client = DeepgramClient(api_key=api_key)

response = client.speak.v1.generate(
text,
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
response = client.speak.v1.audio.generate(
text=text,
model="aura-2-asteria-en",
)

with open("output_example2.wav", "wb") as f:
f.write(response)
for chunk in response:
f.write(chunk)

print("✓ Audio saved to output_example2.wav")
else:
Expand All @@ -96,10 +94,8 @@ def example_ssml_migration():
# Existing SSML from another TTS provider
ssml = """<speak>
Welcome to your medication guide.
<break time="500ms"/>
Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme>
Take <phoneme alphabet="ipa" ph="ˌæzəˈθaɪəpriːn">azathioprine</phoneme>
as prescribed.
<break time="1000ms"/>
Contact your doctor if you experience side effects.
</speak>"""

Expand All @@ -112,13 +108,14 @@ def example_ssml_migration():
if api_key:
client = DeepgramClient(api_key=api_key)

response = client.speak.v1.generate(
text,
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
response = client.speak.v1.audio.generate(
text=text,
model="aura-2-asteria-en",
)

with open("output_example3.wav", "wb") as f:
f.write(response)
for chunk in response:
f.write(chunk)

print("✓ Audio saved to output_example3.wav")
else:
Expand All @@ -137,9 +134,7 @@ def example_mixed_ssml_and_builder():
text = (
TextBuilder()
.from_ssml(ssml)
.pause(500)
.text(" Store at room temperature.")
.pause(500)
.text(" Keep out of reach of children.")
.build()
)
Expand All @@ -150,13 +145,14 @@ def example_mixed_ssml_and_builder():
if api_key:
client = DeepgramClient(api_key=api_key)

response = client.speak.v1.generate(
text,
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
response = client.speak.v1.audio.generate(
text=text,
model="aura-2-asteria-en",
)

with open("output_example4.wav", "wb") as f:
f.write(response)
for chunk in response:
f.write(chunk)

print("✓ Audio saved to output_example4.wav")
else:
Expand All @@ -172,19 +168,15 @@ def example_pharmacy_instructions():
TextBuilder()
.text("Prescription for ")
.pronunciation("lisinopril", "laɪˈsɪnəprɪl")
.pause(300)
.text(" Take one tablet by mouth daily for hypertension.")
.pause(500)
.text(". Take one tablet by mouth daily for hypertension.")
.text(" Common side effects may include ")
.pronunciation("hypotension", "ˌhaɪpoʊˈtɛnʃən")
.text(" or dizziness.")
.pause(500)
.text(" Do not take with ")
.pronunciation("aliskiren", "əˈlɪskɪrɛn")
.text(" or ")
.pronunciation("sacubitril", "səˈkjuːbɪtrɪl")
.pause(500)
.text(" Call your doctor if symptoms worsen.")
.text(". Call your doctor if symptoms worsen.")
.build()
)

Expand All @@ -194,14 +186,15 @@ def example_pharmacy_instructions():
if api_key:
client = DeepgramClient(api_key=api_key)

response = client.speak.v1.generate(
text,
model=AudioGenerateRequestModel.AURA_ASTERIA_EN,
encoding=AudioGenerateRequestEncoding.LINEAR16,
response = client.speak.v1.audio.generate(
text=text,
model="aura-2-asteria-en",
encoding="linear16",
)

with open("output_example5.wav", "wb") as f:
f.write(response)
for chunk in response:
f.write(chunk)

print("✓ Audio saved to output_example5.wav")
else:
Expand Down
15 changes: 6 additions & 9 deletions examples/24-text-builder-streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,13 @@ def on_message(message: SpeakV1SocketClientResponse) -> None:
connection.on(EventType.ERROR, lambda error: print(f"✗ Error: {error}"))

# Send the TextBuilder-generated text
text_message = SpeakV1Text(text=text)
connection.send_speak_v_1_text(text_message)
connection.send_text(SpeakV1Text(text=text))

# Flush to ensure all text is processed
flush_message = SpeakV1Flush()
connection.send_speak_v_1_flush(flush_message)
connection.send_flush()

# Close the connection when done
close_message = SpeakV1Close()
connection.send_speak_v_1_close(close_message)
connection.send_close()

# Start listening - this blocks until the connection closes
connection.start_listening()
Expand Down Expand Up @@ -138,10 +135,10 @@ def on_message(message: SpeakV1SocketClientResponse) -> None:
# Send multiple messages
for i, text in enumerate([intro, instruction1, instruction2, closing], 1):
print(f"Sending message {i}: {text[:50]}...")
connection.send_speak_v_1_text(SpeakV1Text(text=text))
connection.send_text(SpeakV1Text(text=text))

connection.send_speak_v_1_flush(SpeakV1Flush())
connection.send_speak_v_1_close(SpeakV1Close())
connection.send_flush()
connection.send_close()

connection.start_listening()

Expand Down
Loading
Loading