diff --git a/README.md b/README.md index 6ce9792d..d87c90c0 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,26 @@ To make sure our community is safe for all, be sure to review and agree to our [Code of Conduct](./CODE_OF_CONDUCT.md). Then see the [Contribution](./CONTRIBUTING.md) guidelines for more information. +## Prerequisites + +In order to develop new features for the SDK itself, you first need to uninstall any previous installation of the `deepgram-sdk` and then install/pip the dependencies contained in the `requirements.txt` then instruct python (via pip) to use the SDK by installing it locally. + +From the root of the repo, that would entail: + +```bash +pip uninstall deepgram-sdk +pip install -r requirements.txt +pip install -e . +``` + +## Testing + +If you are looking to contribute or modify pytest code, then you need to install the following dependencies: + +```bash +pip install -r requirements-dev.txt +``` + # Getting Help We love to hear from you so if you have questions, comments or find a bug in the diff --git a/deepgram/__init__.py b/deepgram/__init__.py index d54ddee3..d0416dd5 100644 --- a/deepgram/__init__.py +++ b/deepgram/__init__.py @@ -19,6 +19,7 @@ from .client import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/client.py b/deepgram/client.py index 000f23a9..eb6aa5f8 100644 --- a/deepgram/client.py +++ b/deepgram/client.py @@ -21,6 +21,7 @@ from .clients import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/__init__.py b/deepgram/clients/__init__.py index 501cadd5..056238d5 100644 --- a/deepgram/clients/__init__.py +++ b/deepgram/clients/__init__.py @@ -12,6 +12,7 @@ from .live import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/listen.py b/deepgram/clients/listen.py index a6d8e328..2c5aff86 100644 --- a/deepgram/clients/listen.py +++ b/deepgram/clients/listen.py @@ -35,6 +35,7 @@ from .live.client import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/live/__init__.py b/deepgram/clients/live/__init__.py index f8902f31..eddac214 100644 --- a/deepgram/clients/live/__init__.py +++ b/deepgram/clients/live/__init__.py @@ -10,5 +10,6 @@ from .client import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/live/client.py b/deepgram/clients/live/client.py index 6d2def0f..da58ea06 100644 --- a/deepgram/clients/live/client.py +++ b/deepgram/clients/live/client.py @@ -9,6 +9,7 @@ from .v1.response import ( LiveResultResponse as LiveResultResponseLatest, MetadataResponse as MetadataResponseLatest, + UtteranceEndResponse as UtteranceEndResponseLatest, ErrorResponse as ErrorResponseLatest, ) @@ -44,6 +45,14 @@ class MetadataResponse(MetadataResponseLatest): pass +class UtteranceEndResponse(UtteranceEndResponseLatest): + """ + pass through for UtteranceEndResponse based on API version + """ + + pass + + class ErrorResponse(ErrorResponseLatest): """ pass through for ErrorResponse based on API version diff --git a/deepgram/clients/live/enums.py b/deepgram/clients/live/enums.py index 501feac2..1263753e 100644 --- a/deepgram/clients/live/enums.py +++ b/deepgram/clients/live/enums.py @@ -14,5 +14,6 @@ class LiveTranscriptionEvents(Enum): Close = "Close" Transcript = "Results" Metadata = "Metadata" + UtteranceEnd = "UtteranceEnd" Error = "Error" Warning = "Warning" diff --git a/deepgram/clients/live/v1/__init__.py b/deepgram/clients/live/v1/__init__.py index 5db2dca9..65264a6e 100644 --- a/deepgram/clients/live/v1/__init__.py +++ b/deepgram/clients/live/v1/__init__.py @@ -9,5 +9,6 @@ from .response import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) diff --git a/deepgram/clients/live/v1/async_client.py b/deepgram/clients/live/v1/async_client.py index 4909a6fe..258ddbee 100644 --- a/deepgram/clients/live/v1/async_client.py +++ b/deepgram/clients/live/v1/async_client.py @@ -14,6 +14,7 @@ from .response import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) from .options import LiveOptions @@ -121,6 +122,19 @@ async def _start(self) -> None: metadata=result, **dict(self.kwargs), ) + case LiveTranscriptionEvents.UtteranceEnd.value: + self.logger.debug( + "response_type: %s, data: %s", response_type, data + ) + result = UtteranceEndResponse.from_json(message) + if result is None: + self.logger.error("UtteranceEndResponse.from_json is None") + continue + await self._emit( + LiveTranscriptionEvents.UtteranceEnd, + utterance_end=result, + **dict(self.kwargs), + ) case LiveTranscriptionEvents.Error.value: self.logger.debug( "response_type: %s, data: %s", response_type, data diff --git a/deepgram/clients/live/v1/client.py b/deepgram/clients/live/v1/client.py index b3c43811..331cb988 100644 --- a/deepgram/clients/live/v1/client.py +++ b/deepgram/clients/live/v1/client.py @@ -15,6 +15,7 @@ from .response import ( LiveResultResponse, MetadataResponse, + UtteranceEndResponse, ErrorResponse, ) from .options import LiveOptions @@ -152,6 +153,19 @@ def _listening(self) -> None: metadata=result, **dict(self.kwargs), ) + case LiveTranscriptionEvents.UtteranceEnd.value: + self.logger.debug( + "response_type: %s, data: %s", response_type, data + ) + result = UtteranceEndResponse.from_json(message) + if result is None: + self.logger.error("UtteranceEndResponse.from_json is None") + continue + self._emit( + LiveTranscriptionEvents.UtteranceEnd, + utterance_end=result, + **dict(self.kwargs), + ) case LiveTranscriptionEvents.Error.value: self.logger.debug( "response_type: %s, data: %s", response_type, data diff --git a/deepgram/clients/live/v1/response.py b/deepgram/clients/live/v1/response.py index 2e291ddd..b97507e6 100644 --- a/deepgram/clients/live/v1/response.py +++ b/deepgram/clients/live/v1/response.py @@ -152,6 +152,19 @@ def __getitem__(self, key): return _dict[key] +# Utterance End Message + + +@dataclass_json +@dataclass +class UtteranceEndResponse: + """ + UtteranceEnd Message from the Deepgram Platform + """ + + type: Optional[str] = "" + + # Error Message diff --git a/examples/README.md b/examples/README.md index b8469941..0e7c034c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,6 +2,14 @@ The example projects are meant to be used to test features locally by contributors working on this SDK. +## Prerequisites + +In order to run the code in the `examples` folder, you first need to install/pip the dependencies contained in the `requirements-examples.txt` for the examples. + +```bash +pip install -r requirements-examples.txt +``` + ## Steps to Test Your Code If you are contributing changes to this SDK, you can test those changes by using the `prerecorded`, `streaming`, or `manage` "hello world"-style applications in the `examples` folder. Here are the steps to follow: @@ -26,4 +34,4 @@ If you chose to set an environment variable in your shell profile (ie `.bash_pro ```bash python main.py -``` \ No newline at end of file +``` diff --git a/examples/advanced/microphone-inheritance/README.md b/examples/advanced/microphone-inheritance/README.md new file mode 100644 index 00000000..6e3c21ae --- /dev/null +++ b/examples/advanced/microphone-inheritance/README.md @@ -0,0 +1,20 @@ +# Live API (Real-Time) Example + +This example uses the Microphone as input in order to detect conversation insights in what is being said. This example required additional components (for the microphone) to be installed in order for this example to function correctly. + +## Configuration + +The SDK (and this example) needs to be initialized with your account's credentials `DEEPGRAM_API_KEY`, which are available in your [Deepgram Console][dg-console]. If you don't have a Deepgram account, you can [sign up here][dg-signup] for free. + +You must add your `DEEPGRAM_API_KEY` to your list of environment variables. We use environment variables because they are easy to configure, support PaaS-style deployments, and work well in containerized environments like Docker and Kubernetes. + +```bash +export DEEPGRAM_API_KEY=YOUR-APP-KEY-HERE +``` + +## Installation + +The Live API (Real-Time) example makes use of a [microphone package](https://github.com/deepgram/deepgram-python-sdk/tree/main/deepgram/audio/microphone) contained within the repository. That package makes use of the [PortAudio library](http://www.portaudio.com/) which is a cross-platform open source audio library. If you are on Linux, you can install this library using whatever package manager is available (yum, apt, etc.) on your operating system. If you are on macOS, you can install this library using [brew](https://brew.sh/). + +[dg-console]: https://console.deepgram.com/ +[dg-signup]: https://console.deepgram.com/signup diff --git a/examples/advanced/microphone-inheritance/main.py b/examples/advanced/microphone-inheritance/main.py new file mode 100644 index 00000000..01576bbc --- /dev/null +++ b/examples/advanced/microphone-inheritance/main.py @@ -0,0 +1,114 @@ +# Copyright 2023 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +from dotenv import load_dotenv +import logging, verboselogs +from time import sleep + +from deepgram import ( + DeepgramClient, + DeepgramClientOptions, + LiveTranscriptionEvents, + LiveClient, + LiveOptions, + Microphone, + LiveResultResponse, + MetadataResponse, + UtteranceEndResponse, + ErrorResponse, +) + +load_dotenv() + + +# more complex example +class MyLiveClient(LiveClient): + def __init__(self, config: DeepgramClientOptions): + super().__init__(config) + super().on(LiveTranscriptionEvents.Transcript, self.on_message) + super().on(LiveTranscriptionEvents.Metadata, self.on_metadata) + super().on(LiveTranscriptionEvents.UtteranceEnd, self.on_utterance_end) + super().on(LiveTranscriptionEvents.Error, self.on_error) + # self.test = "child" + + def on_message(self, parent, result, **kwargs): + # print(f"child attr: {self.test}") + # print(f"parent attr: {parent.endpoint}") + sentence = result.channel.alternatives[0].transcript + if len(sentence) == 0: + return + print(f"speaker: {sentence}") + + # testing modifying self class + if self.myattr is not None: + print(f"myattr - {self.myattr}") + else: + print("Setting myattr=hello") + setattr(self, "myattr", "hello") + self.myattr = "bye" + + # testing kwargs + val = kwargs["test"] + print(f"kwargs - {val}") + + def on_metadata(self, parent, metadata, **kwargs): + print(f"\n\n{metadata}\n\n") + + def on_utterance_end(self, parent, utterance_end, **kwargs): + print(f"\n\n{utterance_end}\n\n") + + def on_error(self, parent, error, **kwargs): + print(f"\n\n{error}\n\n") + + +def main(): + try: + # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM + # config = DeepgramClientOptions( + # verbose=logging.DEBUG, + # options={"keepalive": "true"} + # ) + # deepgram: DeepgramClient = DeepgramClient("", config) + # otherwise, use default config + deepgram = DeepgramClient() + liveClient = MyLiveClient(deepgram.config) + + options = LiveOptions( + punctuate=True, + language="en-US", + encoding="linear16", + channels=1, + sample_rate=16000, + # To get UtteranceEnd, the following must be set: + interim_results=True, + utterance_end_ms="1000", + ) + liveClient.start(options, addons=dict(myattr="hello"), test="hello") + + # Open a microphone stream + microphone = Microphone(liveClient.send) + + # start microphone + microphone.start() + + # wait until finished + input("Press Enter to stop recording...\n\n") + + # Wait for the microphone to close + microphone.finish() + + # Indicate that we've finished + liveClient.finish() + + print("Finished") + # sleep(30) # wait 30 seconds to see if there is any additional socket activity + # print("Really done!") + + except Exception as e: + print(f"Could not open socket: {e}") + return + + +if __name__ == "__main__": + main() diff --git a/examples/streaming/async_http/main.py b/examples/streaming/async_http/main.py index 99cde345..a720f577 100644 --- a/examples/streaming/async_http/main.py +++ b/examples/streaming/async_http/main.py @@ -29,25 +29,23 @@ async def main(): dg_connection = deepgram.listen.asynclive.v("1") async def on_message(self, result, **kwargs): - if result is None: - return sentence = result.channel.alternatives[0].transcript if len(sentence) == 0: return print(f"speaker: {sentence}") async def on_metadata(self, metadata, **kwargs): - if metadata is None: - return print(f"\n\n{metadata}\n\n") + def on_utterance_end(self, utterance_end, **kwargs): + print(f"\n\n{utterance_end}\n\n") + async def on_error(self, error, **kwargs): - if error is None: - return print(f"\n\n{error}\n\n") dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) dg_connection.on(LiveTranscriptionEvents.Error, on_error) # connect to websocket diff --git a/examples/streaming/http/main.py b/examples/streaming/http/main.py index b0e6afaa..dbcce6df 100644 --- a/examples/streaming/http/main.py +++ b/examples/streaming/http/main.py @@ -34,25 +34,23 @@ def main(): dg_connection = deepgram.listen.live.v("1") def on_message(self, result, **kwargs): - if result is None: - return sentence = result.channel.alternatives[0].transcript if len(sentence) == 0: return print(f"speaker: {sentence}") def on_metadata(self, metadata, **kwargs): - if metadata is None: - return print(f"\n\n{metadata}\n\n") + def on_utterance_end(self, utterance_end, **kwargs): + print(f"\n\n{utterance_end}\n\n") + def on_error(self, error, **kwargs): - if error is None: - return print(f"\n\n{error}\n\n") dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) dg_connection.on(LiveTranscriptionEvents.Error, on_error) # connect to websocket diff --git a/examples/streaming/microphone/README.md b/examples/streaming/microphone/README.md new file mode 100644 index 00000000..6e3c21ae --- /dev/null +++ b/examples/streaming/microphone/README.md @@ -0,0 +1,20 @@ +# Live API (Real-Time) Example + +This example uses the Microphone as input in order to detect conversation insights in what is being said. This example required additional components (for the microphone) to be installed in order for this example to function correctly. + +## Configuration + +The SDK (and this example) needs to be initialized with your account's credentials `DEEPGRAM_API_KEY`, which are available in your [Deepgram Console][dg-console]. If you don't have a Deepgram account, you can [sign up here][dg-signup] for free. + +You must add your `DEEPGRAM_API_KEY` to your list of environment variables. We use environment variables because they are easy to configure, support PaaS-style deployments, and work well in containerized environments like Docker and Kubernetes. + +```bash +export DEEPGRAM_API_KEY=YOUR-APP-KEY-HERE +``` + +## Installation + +The Live API (Real-Time) example makes use of a [microphone package](https://github.com/deepgram/deepgram-python-sdk/tree/main/deepgram/audio/microphone) contained within the repository. That package makes use of the [PortAudio library](http://www.portaudio.com/) which is a cross-platform open source audio library. If you are on Linux, you can install this library using whatever package manager is available (yum, apt, etc.) on your operating system. If you are on macOS, you can install this library using [brew](https://brew.sh/). + +[dg-console]: https://console.deepgram.com/ +[dg-signup]: https://console.deepgram.com/signup diff --git a/examples/streaming/microphone/main.py b/examples/streaming/microphone/main.py index 51d01de2..fb975382 100644 --- a/examples/streaming/microphone/main.py +++ b/examples/streaming/microphone/main.py @@ -30,38 +30,24 @@ def main(): dg_connection = deepgram.listen.live.v("1") - def on_message(self, result, addon=dict(myattr=True), **kwargs): - if result is None: - return + def on_message(self, result, **kwargs): sentence = result.channel.alternatives[0].transcript if len(sentence) == 0: return print(f"speaker: {sentence}") - # testing modifying self class - if self.myattr is not None: - print(f"myattr - {self.myattr}") - else: - print("Setting myattr=hello") - setattr(self, "myattr", "hello") - self.myattr = "bye" - - # testing kwargs - val = kwargs["test"] - print(f"kwargs - {val}") - def on_metadata(self, metadata, **kwargs): - if metadata is None: - return print(f"\n\n{metadata}\n\n") + def on_utterance_end(self, utterance_end, **kwargs): + print(f"\n\n{utterance_end}\n\n") + def on_error(self, error, **kwargs): - if error is None: - return print(f"\n\n{error}\n\n") dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) dg_connection.on(LiveTranscriptionEvents.Error, on_error) options = LiveOptions( @@ -70,6 +56,9 @@ def on_error(self, error, **kwargs): encoding="linear16", channels=1, sample_rate=16000, + # To get UtteranceEnd, the following must be set: + interim_results=True, + utterance_end_ms="1000", ) dg_connection.start(options, addons=dict(myattr="hello"), test="hello")