Implement Microphone Mute

deepgram · Mar 13, 2024 · 7b35fa6 · 7b35fa6
1 parent c44eb48
commit 7b35fa6
Show file tree

Hide file tree

Showing 2 changed files with 174 additions and 11 deletions.
diff --git a/deepgram/audio/microphone/microphone.py b/deepgram/audio/microphone/microphone.py
@@ -19,7 +19,7 @@ class Microphone:
 
     def __init__(
         self,
-        push_callback,
+        push_callback=None,
         verbose: int = LOGGING,
         rate: int = RATE,
         chunk: int = CHUNK,
@@ -45,6 +45,7 @@ def __init__(
         self.asyncio_loop = None
         self.asyncio_thread = None
         self.stream = None
+        self.is_muted = False
 
     def _start_asyncio_loop(self) -> None:
         self.asyncio_loop = asyncio.new_event_loop()
@@ -66,6 +67,12 @@ def is_active(self) -> bool:
         self.logger.debug("Microphone.is_active LEAVE")
         return val
 
+    def set_callback(self, push_callback) -> None:
+        """
+        Set the callback function to be called when data is received.
+        """
+        self.push_callback_org = push_callback
+
     def start(self) -> bool:
         """
         starts the microphone stream
@@ -83,16 +90,6 @@ def start(self) -> bool:
         self.logger.info("chunk: %d", self.chunk)
         self.logger.info("input_device_id: %d", self.input_device_index)
 
-        self.stream = self.audio.open(
-            format=self.format,
-            channels=self.channels,
-            rate=self.rate,
-            input=True,
-            frames_per_buffer=self.chunk,
-            input_device_index=self.input_device_index,
-            stream_callback=self._callback,
-        )
-
         if inspect.iscoroutinefunction(self.push_callback_org):
             self.logger.verbose("async/await callback - wrapping")
             # Run our own asyncio loop.
@@ -106,6 +103,16 @@ def start(self) -> bool:
             self.logger.verbose("regular threaded callback")
             self.push_callback = self.push_callback_org
 
+        self.stream = self.audio.open(
+            format=self.format,
+            channels=self.channels,
+            rate=self.rate,
+            input=True,
+            frames_per_buffer=self.chunk,
+            input_device_index=self.input_device_index,
+            stream_callback=self._callback,
+        )
+
         self.exit.clear()
         self.stream.start_stream()
 
@@ -133,6 +140,10 @@ def _callback(self, input_data, frame_count, time_info, status_flags):
             return None, pyaudio.paContinue
 
         try:
+            if self.is_muted:
+                size = len(input_data)
+                input_data = b"\x00" * size
+
             self.push_callback(input_data)
         except Exception as e:
             self.logger.error("Error while sending: %s", str(e))
@@ -142,6 +153,40 @@ def _callback(self, input_data, frame_count, time_info, status_flags):
         self.logger.debug("Microphone._callback LEAVE")
         return input_data, pyaudio.paContinue
 
+    def mute(self) -> bool:
+        """
+        Mutes the microphone stream
+        """
+        self.logger.debug("Microphone.mute ENTER")
+
+        if self.stream is None:
+            self.logger.error("mute() failed. Library not initialized.")
+            self.logger.debug("Microphone.mute LEAVE")
+            return False
+
+        self.is_muted = True
+
+        self.logger.notice("mute() succeeded")
+        self.logger.debug("Microphone.mute LEAVE")
+        return True
+
+    def unmute(self) -> bool:
+        """
+        Unmutes the microphone stream
+        """
+        self.logger.debug("Microphone.unmute ENTER")
+
+        if self.stream is None:
+            self.logger.error("unmute() failed. Library not initialized.")
+            self.logger.debug("Microphone.unmute LEAVE")
+            return False
+
+        self.is_muted = False
+
+        self.logger.notice("unmute() succeeded")
+        self.logger.debug("Microphone.unmute LEAVE")
+        return True
+
     def finish(self) -> bool:
         """
         Stops the microphone stream

diff --git a/examples/advanced/streaming/mute-microphone/main.py b/examples/advanced/streaming/mute-microphone/main.py
@@ -0,0 +1,118 @@
+# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+from dotenv import load_dotenv
+import logging, verboselogs
+from time import sleep
+
+from deepgram import (
+    DeepgramClient,
+    DeepgramClientOptions,
+    LiveTranscriptionEvents,
+    LiveOptions,
+    Microphone,
+)
+
+load_dotenv()
+
+
+def main():
+    try:
+        # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
+        # config = DeepgramClientOptions(
+        #     verbose=logging.DEBUG, options={"keepalive": "true"}
+        # )
+        # deepgram: DeepgramClient = DeepgramClient("", config)
+        # otherwise, use default config
+        deepgram: DeepgramClient = DeepgramClient()
+
+        # create the microphone
+        microphone = Microphone()
+
+        dg_connection = deepgram.listen.live.v("1")
+
+        def on_open(self, open, **kwargs):
+            print(f"\n\n{open}\n\n")
+
+        def on_message(self, result, **kwargs):
+            sentence = result.channel.alternatives[0].transcript
+            if len(sentence) == 0:
+                return
+            microphone.mute()
+            print(f"speaker: {sentence}")
+            microphone.unmute()
+
+        def on_metadata(self, metadata, **kwargs):
+            print(f"\n\n{metadata}\n\n")
+
+        def on_speech_started(self, speech_started, **kwargs):
+            print(f"\n\n{speech_started}\n\n")
+
+        def on_utterance_end(self, utterance_end, **kwargs):
+            print(f"\n\n{utterance_end}\n\n")
+
+        def on_error(self, error, **kwargs):
+            print(f"\n\n{error}\n\n")
+
+        def on_close(self, close, **kwargs):
+            print(f"\n\n{close}\n\n")
+
+        dg_connection.on(LiveTranscriptionEvents.Open, on_open)
+        dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
+        dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
+        dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
+        dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
+        dg_connection.on(LiveTranscriptionEvents.Error, on_error)
+        dg_connection.on(LiveTranscriptionEvents.Close, on_close)
+
+        options: LiveOptions = LiveOptions(
+            model="nova-2",
+            punctuate=True,
+            language="en-US",
+            encoding="linear16",
+            channels=1,
+            sample_rate=16000,
+            # To get UtteranceEnd, the following must be set:
+            interim_results=True,
+            utterance_end_ms="1000",
+            vad_events=True,
+        )
+        dg_connection.start(options)
+
+        # set the callback on the microphone
+        microphone.set_callback(dg_connection.send)
+
+        # start microphone
+        microphone.start()
+
+        # wait until finished
+        input("Press Enter to mute microphone...\n\n")
+
+        microphone.mute()
+
+        # wait until finished
+        input("Press Enter to unmute microphone...\n\n")
+
+        microphone.unmute()
+
+        # wait until finished
+        input("Press Enter to stop recording...\n\n")
+
+        # Wait for the microphone to close
+        microphone.finish()
+
+        # Indicate that we've finished
+        dg_connection.finish()
+
+        print("Finished")
+        # sleep(30)  # wait 30 seconds to see if there is any additional socket activity
+        # print("Really done!")
+
+    except Exception as e:
+        print(f"Could not open socket: {e}")
+        return
+
+
+if __name__ == "__main__":
+    main()