deepgram · davidvonthenen · Dec 28, 2023 · Dec 27, 2023
@@ -189,6 +189,26 @@ To make sure our community is safe for all, be sure to review and agree to our
 [Code of Conduct](./CODE_OF_CONDUCT.md). Then see the
 [Contribution](./CONTRIBUTING.md) guidelines for more information.
 
+## Prerequisites
+
+In order to develop new features for the SDK itself, you first need to uninstall any previous installation of the `deepgram-sdk` and then install/pip the dependencies contained in the `requirements.txt` then instruct python (via pip) to use the SDK by installing it locally.
+
+From the root of the repo, that would entail:
+
+```bash
+pip uninstall deepgram-sdk
+pip install -r requirements.txt
+pip install -e .
+```
+
+## Testing
+
+If you are looking to contribute or modify pytest code, then you need to install the following dependencies:
+
+```bash
+pip install -r requirements-dev.txt
+```
+
 # Getting Help
 
 We love to hear from you so if you have questions, comments or find a bug in the

@@ -19,6 +19,7 @@
 from .client import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
 

@@ -21,6 +21,7 @@
 from .clients import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
 

@@ -12,6 +12,7 @@
 from .live import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
 

@@ -35,6 +35,7 @@
 from .live.client import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
 

@@ -10,5 +10,6 @@
 from .client import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
@@ -9,6 +9,7 @@
 from .v1.response import (
     LiveResultResponse as LiveResultResponseLatest,
     MetadataResponse as MetadataResponseLatest,
+    UtteranceEndResponse as UtteranceEndResponseLatest,
     ErrorResponse as ErrorResponseLatest,
 )
 
@@ -44,6 +45,14 @@ class MetadataResponse(MetadataResponseLatest):
     pass
 
 
+class UtteranceEndResponse(UtteranceEndResponseLatest):
+    """
+    pass through for UtteranceEndResponse based on API version
+    """
+
+    pass
+
+
 class ErrorResponse(ErrorResponseLatest):
     """
     pass through for ErrorResponse based on API version

@@ -14,5 +14,6 @@ class LiveTranscriptionEvents(Enum):
     Close = "Close"
     Transcript = "Results"
     Metadata = "Metadata"
+    UtteranceEnd = "UtteranceEnd"
     Error = "Error"
     Warning = "Warning"
@@ -9,5 +9,6 @@
 from .response import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
@@ -14,6 +14,7 @@
 from .response import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
 from .options import LiveOptions
@@ -121,6 +122,19 @@ async def _start(self) -> None:
                             metadata=result,
                             **dict(self.kwargs),
                         )
+                    case LiveTranscriptionEvents.UtteranceEnd.value:
+                        self.logger.debug(
+                            "response_type: %s, data: %s", response_type, data
+                        )
+                        result = UtteranceEndResponse.from_json(message)
+                        if result is None:
+                            self.logger.error("UtteranceEndResponse.from_json is None")
+                            continue
+                        await self._emit(
+                            LiveTranscriptionEvents.UtteranceEnd,
+                            utterance_end=result,
+                            **dict(self.kwargs),
+                        )
                     case LiveTranscriptionEvents.Error.value:
                         self.logger.debug(
                             "response_type: %s, data: %s", response_type, data

@@ -15,6 +15,7 @@
 from .response import (
     LiveResultResponse,
     MetadataResponse,
+    UtteranceEndResponse,
     ErrorResponse,
 )
 from .options import LiveOptions
@@ -152,6 +153,19 @@ def _listening(self) -> None:
                             metadata=result,
                             **dict(self.kwargs),
                         )
+                    case LiveTranscriptionEvents.UtteranceEnd.value:
+                        self.logger.debug(
+                            "response_type: %s, data: %s", response_type, data
+                        )
+                        result = UtteranceEndResponse.from_json(message)
+                        if result is None:
+                            self.logger.error("UtteranceEndResponse.from_json is None")
+                            continue
+                        self._emit(
+                            LiveTranscriptionEvents.UtteranceEnd,
+                            utterance_end=result,
+                            **dict(self.kwargs),
+                        )
                     case LiveTranscriptionEvents.Error.value:
                         self.logger.debug(
                             "response_type: %s, data: %s", response_type, data

@@ -152,6 +152,19 @@ def __getitem__(self, key):
         return _dict[key]
 
 
+# Utterance End Message
+
+
+@dataclass_json
+@dataclass
+class UtteranceEndResponse:
+    """
+    UtteranceEnd Message from the Deepgram Platform
+    """
+
+    type: Optional[str] = ""
+
+
 # Error Message
 
 

@@ -2,6 +2,14 @@
 
 The example projects are meant to be used to test features locally by contributors working on this SDK.
 
+## Prerequisites
+
+In order to run the code in the `examples` folder, you first need to install/pip the dependencies contained in the `requirements-examples.txt` for the examples.
+
+```bash
+pip install -r requirements-examples.txt
+```
+
 ## Steps to Test Your Code
 
 If you are contributing changes to this SDK, you can test those changes by using the `prerecorded`, `streaming`, or `manage` "hello world"-style applications in the `examples` folder. Here are the steps to follow:
@@ -26,4 +34,4 @@ If you chose to set an environment variable in your shell profile (ie `.bash_pro
 
 ```bash
 python main.py
-```
+```
@@ -0,0 +1,20 @@
+# Live API (Real-Time) Example
+
+This example uses the Microphone as input in order to detect conversation insights in what is being said. This example required additional components (for the microphone) to be installed in order for this example to function correctly. 
+
+## Configuration
+
+The SDK (and this example) needs to be initialized with your account's credentials `DEEPGRAM_API_KEY`, which are available in your [Deepgram Console][dg-console]. If you don't have a Deepgram account, you can [sign up here][dg-signup] for free.
+
+You must add your `DEEPGRAM_API_KEY` to your list of environment variables. We use environment variables because they are easy to configure, support PaaS-style deployments, and work well in containerized environments like Docker and Kubernetes.
+
+```bash
+export DEEPGRAM_API_KEY=YOUR-APP-KEY-HERE
+```
+
+## Installation
+
+The Live API (Real-Time) example makes use of a [microphone package](https://github.com/deepgram/deepgram-python-sdk/tree/main/deepgram/audio/microphone) contained within the repository. That package makes use of the [PortAudio library](http://www.portaudio.com/) which is a cross-platform open source audio library. If you are on Linux, you can install this library using whatever package manager is available (yum, apt, etc.) on your operating system. If you are on macOS, you can install this library using [brew](https://brew.sh/).
+
+[dg-console]: https://console.deepgram.com/
+[dg-signup]: https://console.deepgram.com/signup
@@ -0,0 +1,114 @@
+# Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+from dotenv import load_dotenv
+import logging, verboselogs
+from time import sleep
+
+from deepgram import (
+    DeepgramClient,
+    DeepgramClientOptions,
+    LiveTranscriptionEvents,
+    LiveClient,
+    LiveOptions,
+    Microphone,
+    LiveResultResponse,
+    MetadataResponse,
+    UtteranceEndResponse,
+    ErrorResponse,
+)
+
+load_dotenv()
+
+
+# more complex example
+class MyLiveClient(LiveClient):
+    def __init__(self, config: DeepgramClientOptions):
+        super().__init__(config)
+        super().on(LiveTranscriptionEvents.Transcript, self.on_message)
+        super().on(LiveTranscriptionEvents.Metadata, self.on_metadata)
+        super().on(LiveTranscriptionEvents.UtteranceEnd, self.on_utterance_end)
+        super().on(LiveTranscriptionEvents.Error, self.on_error)
+        # self.test = "child"
+
+    def on_message(self, parent, result, **kwargs):
+        # print(f"child attr: {self.test}")
+        # print(f"parent attr: {parent.endpoint}")
+        sentence = result.channel.alternatives[0].transcript
+        if len(sentence) == 0:
+            return
+        print(f"speaker: {sentence}")
+
+        # testing modifying self class
+        if self.myattr is not None:
+            print(f"myattr - {self.myattr}")
+        else:
+            print("Setting myattr=hello")
+            setattr(self, "myattr", "hello")
+        self.myattr = "bye"
+
+        # testing kwargs
+        val = kwargs["test"]
+        print(f"kwargs - {val}")
+
+    def on_metadata(self, parent, metadata, **kwargs):
+        print(f"\n\n{metadata}\n\n")
+
+    def on_utterance_end(self, parent, utterance_end, **kwargs):
+        print(f"\n\n{utterance_end}\n\n")
+
+    def on_error(self, parent, error, **kwargs):
+        print(f"\n\n{error}\n\n")
+
+
+def main():
+    try:
+        # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
+        # config = DeepgramClientOptions(
+        #     verbose=logging.DEBUG,
+        #     options={"keepalive": "true"}
+        # )
+        # deepgram: DeepgramClient = DeepgramClient("", config)
+        # otherwise, use default config
+        deepgram = DeepgramClient()
+        liveClient = MyLiveClient(deepgram.config)
+
+        options = LiveOptions(
+            punctuate=True,
+            language="en-US",
+            encoding="linear16",
+            channels=1,
+            sample_rate=16000,
+            # To get UtteranceEnd, the following must be set:
+            interim_results=True,
+            utterance_end_ms="1000",
+        )
+        liveClient.start(options, addons=dict(myattr="hello"), test="hello")
+
+        # Open a microphone stream
+        microphone = Microphone(liveClient.send)
+
+        # start microphone
+        microphone.start()
+
+        # wait until finished
+        input("Press Enter to stop recording...\n\n")
+
+        # Wait for the microphone to close
+        microphone.finish()
+
+        # Indicate that we've finished
+        liveClient.finish()
+
+        print("Finished")
+        # sleep(30)  # wait 30 seconds to see if there is any additional socket activity
+        # print("Really done!")
+
+    except Exception as e:
+        print(f"Could not open socket: {e}")
+        return
+
+
+if __name__ == "__main__":
+    main()
@@ -29,25 +29,23 @@ async def main():
         dg_connection = deepgram.listen.asynclive.v("1")
 
         async def on_message(self, result, **kwargs):
-            if result is None:
-                return
             sentence = result.channel.alternatives[0].transcript
             if len(sentence) == 0:
                 return
             print(f"speaker: {sentence}")
 
         async def on_metadata(self, metadata, **kwargs):
-            if metadata is None:
-                return
             print(f"\n\n{metadata}\n\n")
 
+        def on_utterance_end(self, utterance_end, **kwargs):
+            print(f"\n\n{utterance_end}\n\n")
+
         async def on_error(self, error, **kwargs):
-            if error is None:
-                return
             print(f"\n\n{error}\n\n")
 
         dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
         dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
+        dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
         dg_connection.on(LiveTranscriptionEvents.Error, on_error)
 
         # connect to websocket

@@ -34,25 +34,23 @@ def main():
         dg_connection = deepgram.listen.live.v("1")
 
         def on_message(self, result, **kwargs):
-            if result is None:
-                return
             sentence = result.channel.alternatives[0].transcript
             if len(sentence) == 0:
                 return
             print(f"speaker: {sentence}")
 
         def on_metadata(self, metadata, **kwargs):
-            if metadata is None:
-                return
             print(f"\n\n{metadata}\n\n")
 
+        def on_utterance_end(self, utterance_end, **kwargs):
+            print(f"\n\n{utterance_end}\n\n")
+
         def on_error(self, error, **kwargs):
-            if error is None:
-                return
             print(f"\n\n{error}\n\n")
 
         dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
         dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
+        dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
         dg_connection.on(LiveTranscriptionEvents.Error, on_error)
 
         # connect to websocket