diff --git a/deepgram/__init__.py b/deepgram/__init__.py index 2cbaa3d3..78130373 100644 --- a/deepgram/__init__.py +++ b/deepgram/__init__.py @@ -20,6 +20,7 @@ from .client import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/client.py b/deepgram/client.py index 834dbd27..8eca1c47 100644 --- a/deepgram/client.py +++ b/deepgram/client.py @@ -21,6 +21,7 @@ from .clients import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/__init__.py b/deepgram/clients/__init__.py index 580bf19f..35472724 100644 --- a/deepgram/clients/__init__.py +++ b/deepgram/clients/__init__.py @@ -13,6 +13,7 @@ from .live import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/listen.py b/deepgram/clients/listen.py index 5f945311..19fd6dff 100644 --- a/deepgram/clients/listen.py +++ b/deepgram/clients/listen.py @@ -36,6 +36,7 @@ from .live import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/live/__init__.py b/deepgram/clients/live/__init__.py index 04b7207a..75399908 100644 --- a/deepgram/clients/live/__init__.py +++ b/deepgram/clients/live/__init__.py @@ -9,6 +9,7 @@ from .client import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/live/client.py b/deepgram/clients/live/client.py index da58ea06..7a4fd97c 100644 --- a/deepgram/clients/live/client.py +++ b/deepgram/clients/live/client.py @@ -9,6 +9,7 @@ from .v1.response import ( LiveResultResponse as LiveResultResponseLatest, MetadataResponse as MetadataResponseLatest, + SpeechStartedResponse as SpeechStartedResponseLatest, UtteranceEndResponse as UtteranceEndResponseLatest, ErrorResponse as ErrorResponseLatest, ) @@ -45,6 +46,14 @@ class MetadataResponse(MetadataResponseLatest): pass +class SpeechStartedResponse(SpeechStartedResponseLatest): + """ + pass through for SpeechStartedResponse based on API version + """ + + pass + + class UtteranceEndResponse(UtteranceEndResponseLatest): """ pass through for UtteranceEndResponse based on API version diff --git a/deepgram/clients/live/enums.py b/deepgram/clients/live/enums.py index 1263753e..bb9597fe 100644 --- a/deepgram/clients/live/enums.py +++ b/deepgram/clients/live/enums.py @@ -15,5 +15,6 @@ class LiveTranscriptionEvents(Enum): Transcript = "Results" Metadata = "Metadata" UtteranceEnd = "UtteranceEnd" + SpeechStarted = "SpeechStarted" Error = "Error" Warning = "Warning" diff --git a/deepgram/clients/live/v1/__init__.py b/deepgram/clients/live/v1/__init__.py index 65264a6e..8eb0e9c5 100644 --- a/deepgram/clients/live/v1/__init__.py +++ b/deepgram/clients/live/v1/__init__.py @@ -9,6 +9,7 @@ from .response import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/live/v1/async_client.py b/deepgram/clients/live/v1/async_client.py index d3723c12..f56acbfd 100644 --- a/deepgram/clients/live/v1/async_client.py +++ b/deepgram/clients/live/v1/async_client.py @@ -14,6 +14,7 @@ from .response import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) @@ -127,6 +128,19 @@ async def _start(self) -> None: metadata=result, **dict(self.kwargs), ) + case LiveTranscriptionEvents.SpeechStarted.value: + self.logger.debug( + "response_type: %s, data: %s", response_type, data + ) + result = SpeechStartedResponse.from_json(message) + if result is None: + self.logger.error("SpeechStartedResponse.from_json is None") + continue + await self._emit( + LiveTranscriptionEvents.SpeechStarted, + speech_started=result, + **dict(self.kwargs), + ) case LiveTranscriptionEvents.UtteranceEnd.value: self.logger.debug( "response_type: %s, data: %s", response_type, data diff --git a/deepgram/clients/live/v1/client.py b/deepgram/clients/live/v1/client.py index e0dca293..5b25feff 100644 --- a/deepgram/clients/live/v1/client.py +++ b/deepgram/clients/live/v1/client.py @@ -16,6 +16,7 @@ from .response import ( LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) @@ -159,6 +160,19 @@ def _listening(self) -> None: metadata=result, **dict(self.kwargs), ) + case LiveTranscriptionEvents.SpeechStarted.value: + self.logger.debug( + "response_type: %s, data: %s", response_type, data + ) + result = SpeechStartedResponse.from_json(message) + if result is None: + self.logger.error("SpeechStartedResponse.from_json is None") + continue + self._emit( + LiveTranscriptionEvents.SpeechStarted, + speech_started=result, + **dict(self.kwargs), + ) case LiveTranscriptionEvents.UtteranceEnd.value: self.logger.debug( "response_type: %s, data: %s", response_type, data diff --git a/deepgram/clients/live/v1/options.py b/deepgram/clients/live/v1/options.py index d74a3c42..ab0eb1ec 100644 --- a/deepgram/clients/live/v1/options.py +++ b/deepgram/clients/live/v1/options.py @@ -44,6 +44,7 @@ class LiveOptions: tag: Optional[list] = None tier: Optional[str] = None utterance_end_ms: Optional[str] = None + vad_events: Optional[bool] = None version: Optional[str] = None def __getitem__(self, key): diff --git a/deepgram/clients/live/v1/response.py b/deepgram/clients/live/v1/response.py index 00fcd5c1..555035ab 100644 --- a/deepgram/clients/live/v1/response.py +++ b/deepgram/clients/live/v1/response.py @@ -161,6 +161,25 @@ def __getitem__(self, key): return _dict[key] +# Speech Started Message + + +@dataclass_json +@dataclass +class SpeechStartedResponse: + """ + SpeechStartedResponse Message from the Deepgram Platform + """ + + type: Optional[str] = "" + channel: Optional[List[int]] = None + timestamp: Optional[float] = 0 + + def __getitem__(self, key): + _dict = self.to_dict() + return _dict[key] + + # Utterance End Message diff --git a/examples/advanced/streaming/direct-invocation/main.py b/examples/advanced/streaming/direct-invocation/main.py index 8cfc88b6..8d068225 100644 --- a/examples/advanced/streaming/direct-invocation/main.py +++ b/examples/advanced/streaming/direct-invocation/main.py @@ -39,6 +39,9 @@ def on_message(self, result, **kwargs): def on_metadata(self, metadata, **kwargs): print(f"\n\n{metadata}\n\n") + def on_speech_started(self, speech_started, **kwargs): + print(f"\n\n{speech_started}\n\n") + def on_utterance_end(self, utterance_end, **kwargs): print(f"\n\n{utterance_end}\n\n") @@ -47,11 +50,12 @@ def on_error(self, error, **kwargs): liveClient.on(LiveTranscriptionEvents.Transcript, on_message) liveClient.on(LiveTranscriptionEvents.Metadata, on_metadata) + liveClient.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started) liveClient.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) liveClient.on(LiveTranscriptionEvents.Error, on_error) # connect to websocket - options = LiveOptions(model="nova", interim_results=False, language="en-US") + options = LiveOptions(model="nova-2", language="en-US") liveClient.start(options) lock_exit = threading.Lock() diff --git a/examples/advanced/streaming/microphone-inheritance/main.py b/examples/advanced/streaming/microphone-inheritance/main.py index f980c187..34ad0808 100644 --- a/examples/advanced/streaming/microphone-inheritance/main.py +++ b/examples/advanced/streaming/microphone-inheritance/main.py @@ -14,6 +14,7 @@ Microphone, LiveResultResponse, MetadataResponse, + SpeechStartedResponse, UtteranceEndResponse, ErrorResponse, ) @@ -27,6 +28,7 @@ def __init__(self, config: LiveClient): super().__init__(config) super().on(LiveTranscriptionEvents.Transcript, self.on_message) super().on(LiveTranscriptionEvents.Metadata, self.on_metadata) + super().on(LiveTranscriptionEvents.SpeechStarted, self.on_speech_started) super().on(LiveTranscriptionEvents.UtteranceEnd, self.on_utterance_end) super().on(LiveTranscriptionEvents.Error, self.on_error) # self.test = "child" @@ -54,6 +56,9 @@ def on_message(self, parent, result, **kwargs): def on_metadata(self, parent, metadata, **kwargs): print(f"\n\n{metadata}\n\n") + def on_speech_started(self, parent, speech_started, **kwargs): + print(f"\n\n{speech_started}\n\n") + def on_utterance_end(self, parent, utterance_end, **kwargs): print(f"\n\n{utterance_end}\n\n") @@ -73,6 +78,7 @@ def main(): liveClient = MyLiveClient(ClientOptionsFromEnv()) options = LiveOptions( + model="nova-2", punctuate=True, language="en-US", encoding="linear16", @@ -81,6 +87,7 @@ def main(): # To get UtteranceEnd, the following must be set: interim_results=True, utterance_end_ms="1000", + vad_events=True, ) liveClient.start(options, addons=dict(myattr="hello"), test="hello") diff --git a/examples/streaming/async_http/main.py b/examples/streaming/async_http/main.py index 2f02b4e1..e6c3e661 100644 --- a/examples/streaming/async_http/main.py +++ b/examples/streaming/async_http/main.py @@ -14,8 +14,7 @@ API_KEY = os.getenv("DG_API_KEY") options = LiveOptions( - model="nova", - interim_results=False, + model="nova-2", language="en-US", ) @@ -39,7 +38,10 @@ async def on_message(self, result, **kwargs): async def on_metadata(self, metadata, **kwargs): print(f"\n\n{metadata}\n\n") - def on_utterance_end(self, utterance_end, **kwargs): + async def on_speech_started(self, speech_started, **kwargs): + print(f"\n\n{speech_started}\n\n") + + async def on_utterance_end(self, utterance_end, **kwargs): print(f"\n\n{utterance_end}\n\n") async def on_error(self, error, **kwargs): @@ -47,6 +49,7 @@ async def on_error(self, error, **kwargs): dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started) dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) dg_connection.on(LiveTranscriptionEvents.Error, on_error) diff --git a/examples/streaming/http/main.py b/examples/streaming/http/main.py index dbcce6df..fc4a5e6e 100644 --- a/examples/streaming/http/main.py +++ b/examples/streaming/http/main.py @@ -42,6 +42,9 @@ def on_message(self, result, **kwargs): def on_metadata(self, metadata, **kwargs): print(f"\n\n{metadata}\n\n") + def on_speech_started(self, speech_started, **kwargs): + print(f"\n\n{speech_started}\n\n") + def on_utterance_end(self, utterance_end, **kwargs): print(f"\n\n{utterance_end}\n\n") @@ -50,11 +53,12 @@ def on_error(self, error, **kwargs): dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started) dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) dg_connection.on(LiveTranscriptionEvents.Error, on_error) # connect to websocket - options = LiveOptions(model="nova", interim_results=False, language="en-US") + options = LiveOptions(model="nova-2", language="en-US") dg_connection.start(options) lock_exit = threading.Lock() diff --git a/examples/streaming/microphone/main.py b/examples/streaming/microphone/main.py index 1aade269..d27684d9 100644 --- a/examples/streaming/microphone/main.py +++ b/examples/streaming/microphone/main.py @@ -39,6 +39,9 @@ def on_message(self, result, **kwargs): def on_metadata(self, metadata, **kwargs): print(f"\n\n{metadata}\n\n") + def on_speech_started(self, speech_started, **kwargs): + print(f"\n\n{speech_started}\n\n") + def on_utterance_end(self, utterance_end, **kwargs): print(f"\n\n{utterance_end}\n\n") @@ -47,10 +50,12 @@ def on_error(self, error, **kwargs): dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started) dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) dg_connection.on(LiveTranscriptionEvents.Error, on_error) options = LiveOptions( + model="nova-2", punctuate=True, language="en-US", encoding="linear16", @@ -59,6 +64,7 @@ def on_error(self, error, **kwargs): # To get UtteranceEnd, the following must be set: interim_results=True, utterance_end_ms="1000", + vad_events=True, ) dg_connection.start(options, addons=dict(myattr="hello"), test="hello")