Skip to content

Commit 51f595d

Browse files
Agent API Early Access
1 parent aff6110 commit 51f595d

File tree

20 files changed

+2653
-16
lines changed

20 files changed

+2653
-16
lines changed

deepgram/__init__.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from .errors import DeepgramApiKeyError
3535

3636
# listen/read client
37-
from .client import Listen, Read
37+
from .client import ListenRouter, ReadRouter, SpeakRouter, AgentRouter
3838

3939
# common
4040
from .client import (
@@ -302,6 +302,57 @@
302302
AsyncSelfHostedClient,
303303
)
304304

305+
306+
# agent
307+
from .client import AgentWebSocketEvents
308+
309+
# websocket
310+
from .client import (
311+
AgentWebSocketClient,
312+
AsyncAgentWebSocketClient,
313+
)
314+
315+
from .client import (
316+
#### common websocket response
317+
# OpenResponse,
318+
# CloseResponse,
319+
# ErrorResponse,
320+
# UnhandledResponse,
321+
#### unique
322+
WelcomeResponse,
323+
SettingsAppliedResponse,
324+
ConversationTextResponse,
325+
UserStartedSpeakingResponse,
326+
AgentThinkingResponse,
327+
FunctionCallingResponse,
328+
AgentStartedSpeakingResponse,
329+
AgentAudioDoneResponse,
330+
EndOfThoughtResponse,
331+
)
332+
333+
from .client import (
334+
# top level
335+
SettingsConfigurationOptions,
336+
UpdateInstructionsOptions,
337+
UpdateSpeakOptions,
338+
InjectAgentMessageOptions,
339+
# sub level
340+
Listen,
341+
Speak,
342+
Header,
343+
Item,
344+
Properties,
345+
Parameters,
346+
Function,
347+
Provider,
348+
Think,
349+
Agent,
350+
Input,
351+
Output,
352+
Audio,
353+
Context,
354+
)
355+
305356
# utilities
306357
# pylint: disable=wrong-import-position
307358
from .audio import Microphone, DeepgramMicrophoneError

deepgram/audio/microphone/microphone.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import logging
1010

1111
from ...utils import verboselogs
12+
1213
from .constants import LOGGING, CHANNELS, RATE, CHUNK
1314

1415
if TYPE_CHECKING:

deepgram/audio/speaker/speaker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ class Speaker: # pylint: disable=too-many-instance-attributes
5050
# _asyncio_loop: asyncio.AbstractEventLoop
5151
# _asyncio_thread: threading.Thread
5252
_receiver_thread: Optional[threading.Thread] = None
53-
5453
_loop: Optional[asyncio.AbstractEventLoop] = None
5554

5655
_push_callback_org: Optional[Callable] = None
@@ -265,6 +264,7 @@ async def _start_asyncio_receiver(self):
265264
await self._push_callback(message)
266265
elif isinstance(message, bytes):
267266
self._logger.verbose("Received audio data...")
267+
await self._push_callback(message)
268268
self.add_audio_to_queue(message)
269269
except websockets.exceptions.ConnectionClosedOK as e:
270270
self._logger.debug("send() exiting gracefully: %d", e.code)
@@ -297,6 +297,7 @@ def _start_threaded_receiver(self):
297297
self._push_callback(message)
298298
elif isinstance(message, bytes):
299299
self._logger.verbose("Received audio data...")
300+
self._push_callback(message)
300301
self.add_audio_to_queue(message)
301302
except Exception as e: # pylint: disable=broad-except
302303
self._logger.notice("_start_threaded_receiver exception: %s", str(e))
@@ -365,6 +366,7 @@ def _play(self, audio_out, stream, stop):
365366
"LastPlay delta is greater than threshold. Unmute!"
366367
)
367368
self._microphone.unmute()
369+
368370
data = audio_out.get(True, TIMEOUT)
369371
with self._lock_wait:
370372
self._last_datagram = datetime.now()

deepgram/client.py

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
)
5656

5757
# listen client
58-
from .clients import Listen, Read, Speak
58+
from .clients import ListenRouter, ReadRouter, SpeakRouter, AgentRouter
5959

6060
# speech-to-text
6161
from .clients import LiveClient, AsyncLiveClient # backward compat
@@ -308,6 +308,58 @@
308308
AsyncSelfHostedClient,
309309
)
310310

311+
312+
# agent
313+
from .clients import AgentWebSocketEvents
314+
315+
# websocket
316+
from .clients import (
317+
AgentWebSocketClient,
318+
AsyncAgentWebSocketClient,
319+
)
320+
321+
from .clients import (
322+
#### common websocket response
323+
# OpenResponse,
324+
# CloseResponse,
325+
# ErrorResponse,
326+
# UnhandledResponse,
327+
#### unique
328+
WelcomeResponse,
329+
SettingsAppliedResponse,
330+
ConversationTextResponse,
331+
UserStartedSpeakingResponse,
332+
AgentThinkingResponse,
333+
FunctionCallingResponse,
334+
AgentStartedSpeakingResponse,
335+
AgentAudioDoneResponse,
336+
EndOfThoughtResponse,
337+
)
338+
339+
from .clients import (
340+
# top level
341+
SettingsConfigurationOptions,
342+
UpdateInstructionsOptions,
343+
UpdateSpeakOptions,
344+
InjectAgentMessageOptions,
345+
# sub level
346+
Listen,
347+
Speak,
348+
Header,
349+
Item,
350+
Properties,
351+
Parameters,
352+
Function,
353+
Provider,
354+
Think,
355+
Agent,
356+
Input,
357+
Output,
358+
Audio,
359+
Context,
360+
)
361+
362+
311363
# client errors and options
312364
from .options import DeepgramClientOptions, ClientOptionsFromEnv
313365
from .errors import DeepgramApiKeyError
@@ -397,21 +449,21 @@ def listen(self):
397449
"""
398450
Returns a Listen dot-notation router for interacting with Deepgram's transcription services.
399451
"""
400-
return Listen(self._config)
452+
return ListenRouter(self._config)
401453

402454
@property
403455
def read(self):
404456
"""
405457
Returns a Read dot-notation router for interacting with Deepgram's read services.
406458
"""
407-
return Read(self._config)
459+
return ReadRouter(self._config)
408460

409461
@property
410462
def speak(self):
411463
"""
412464
Returns a Speak dot-notation router for interacting with Deepgram's speak services.
413465
"""
414-
return Speak(self._config)
466+
return SpeakRouter(self._config)
415467

416468
@property
417469
@deprecation.deprecated(
@@ -480,6 +532,13 @@ def asyncselfhosted(self):
480532
"""
481533
return self.Version(self._config, "asyncselfhosted")
482534

535+
@property
536+
def agent(self):
537+
"""
538+
Returns a Agent dot-notation router for interacting with Deepgram's speak services.
539+
"""
540+
return AgentRouter(self._config)
541+
483542
# INTERNAL CLASSES
484543
class Version:
485544
"""

deepgram/clients/__init__.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@
4848
)
4949
from .errors import DeepgramModuleError
5050

51-
from .listen_router import Listen
52-
from .read_router import Read
53-
from .speak_router import Speak
51+
from .listen_router import ListenRouter
52+
from .read_router import ReadRouter
53+
from .speak_router import SpeakRouter
54+
from .agent_router import AgentRouter
5455

5556
# listen
5657
from .listen import LiveTranscriptionEvents
@@ -318,3 +319,53 @@
318319
SelfHostedClient,
319320
AsyncSelfHostedClient,
320321
)
322+
323+
# agent
324+
from .agent import AgentWebSocketEvents
325+
326+
# websocket
327+
from .agent import (
328+
AgentWebSocketClient,
329+
AsyncAgentWebSocketClient,
330+
)
331+
332+
from .agent import (
333+
#### common websocket response
334+
# OpenResponse,
335+
# CloseResponse,
336+
# ErrorResponse,
337+
# UnhandledResponse,
338+
#### unique
339+
WelcomeResponse,
340+
SettingsAppliedResponse,
341+
ConversationTextResponse,
342+
UserStartedSpeakingResponse,
343+
AgentThinkingResponse,
344+
FunctionCallingResponse,
345+
AgentStartedSpeakingResponse,
346+
AgentAudioDoneResponse,
347+
EndOfThoughtResponse,
348+
)
349+
350+
from .agent import (
351+
# top level
352+
SettingsConfigurationOptions,
353+
UpdateInstructionsOptions,
354+
UpdateSpeakOptions,
355+
InjectAgentMessageOptions,
356+
# sub level
357+
Listen,
358+
Speak,
359+
Header,
360+
Item,
361+
Properties,
362+
Parameters,
363+
Function,
364+
Provider,
365+
Think,
366+
Agent,
367+
Input,
368+
Output,
369+
Audio,
370+
Context,
371+
)

deepgram/clients/agent/__init__.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
2+
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
# SPDX-License-Identifier: MIT
4+
5+
from .enums import AgentWebSocketEvents
6+
7+
# websocket
8+
from .client import (
9+
AgentWebSocketClient,
10+
AsyncAgentWebSocketClient,
11+
)
12+
13+
from .client import (
14+
#### common websocket response
15+
OpenResponse,
16+
CloseResponse,
17+
ErrorResponse,
18+
UnhandledResponse,
19+
#### unique
20+
WelcomeResponse,
21+
SettingsAppliedResponse,
22+
ConversationTextResponse,
23+
UserStartedSpeakingResponse,
24+
AgentThinkingResponse,
25+
FunctionCallingResponse,
26+
AgentStartedSpeakingResponse,
27+
AgentAudioDoneResponse,
28+
EndOfThoughtResponse,
29+
)
30+
31+
from .client import (
32+
# top level
33+
SettingsConfigurationOptions,
34+
UpdateInstructionsOptions,
35+
UpdateSpeakOptions,
36+
InjectAgentMessageOptions,
37+
# sub level
38+
Listen,
39+
Speak,
40+
Header,
41+
Item,
42+
Properties,
43+
Parameters,
44+
Function,
45+
Provider,
46+
Think,
47+
Agent,
48+
Input,
49+
Output,
50+
Audio,
51+
Context,
52+
)

0 commit comments

Comments
 (0)