diff --git a/deepgram/audio/microphone/microphone.py b/deepgram/audio/microphone/microphone.py index 0212db46..aea11253 100644 --- a/deepgram/audio/microphone/microphone.py +++ b/deepgram/audio/microphone/microphone.py @@ -19,7 +19,7 @@ class Microphone: def __init__( self, - push_callback, + push_callback=None, verbose: int = LOGGING, rate: int = RATE, chunk: int = CHUNK, @@ -45,6 +45,7 @@ def __init__( self.asyncio_loop = None self.asyncio_thread = None self.stream = None + self.is_muted = False def _start_asyncio_loop(self) -> None: self.asyncio_loop = asyncio.new_event_loop() @@ -66,6 +67,12 @@ def is_active(self) -> bool: self.logger.debug("Microphone.is_active LEAVE") return val + def set_callback(self, push_callback) -> None: + """ + Set the callback function to be called when data is received. + """ + self.push_callback_org = push_callback + def start(self) -> bool: """ starts the microphone stream @@ -83,16 +90,6 @@ def start(self) -> bool: self.logger.info("chunk: %d", self.chunk) self.logger.info("input_device_id: %d", self.input_device_index) - self.stream = self.audio.open( - format=self.format, - channels=self.channels, - rate=self.rate, - input=True, - frames_per_buffer=self.chunk, - input_device_index=self.input_device_index, - stream_callback=self._callback, - ) - if inspect.iscoroutinefunction(self.push_callback_org): self.logger.verbose("async/await callback - wrapping") # Run our own asyncio loop. @@ -106,6 +103,16 @@ def start(self) -> bool: self.logger.verbose("regular threaded callback") self.push_callback = self.push_callback_org + self.stream = self.audio.open( + format=self.format, + channels=self.channels, + rate=self.rate, + input=True, + frames_per_buffer=self.chunk, + input_device_index=self.input_device_index, + stream_callback=self._callback, + ) + self.exit.clear() self.stream.start_stream() @@ -133,6 +140,10 @@ def _callback(self, input_data, frame_count, time_info, status_flags): return None, pyaudio.paContinue try: + if self.is_muted: + size = len(input_data) + input_data = b"\x00" * size + self.push_callback(input_data) except Exception as e: self.logger.error("Error while sending: %s", str(e)) @@ -142,6 +153,40 @@ def _callback(self, input_data, frame_count, time_info, status_flags): self.logger.debug("Microphone._callback LEAVE") return input_data, pyaudio.paContinue + def mute(self) -> bool: + """ + Mutes the microphone stream + """ + self.logger.debug("Microphone.mute ENTER") + + if self.stream is None: + self.logger.error("mute() failed. Library not initialized.") + self.logger.debug("Microphone.mute LEAVE") + return False + + self.is_muted = True + + self.logger.notice("mute() succeeded") + self.logger.debug("Microphone.mute LEAVE") + return True + + def unmute(self) -> bool: + """ + Unmutes the microphone stream + """ + self.logger.debug("Microphone.unmute ENTER") + + if self.stream is None: + self.logger.error("unmute() failed. Library not initialized.") + self.logger.debug("Microphone.unmute LEAVE") + return False + + self.is_muted = False + + self.logger.notice("unmute() succeeded") + self.logger.debug("Microphone.unmute LEAVE") + return True + def finish(self) -> bool: """ Stops the microphone stream diff --git a/examples/advanced/streaming/mute-microphone/main.py b/examples/advanced/streaming/mute-microphone/main.py new file mode 100644 index 00000000..8d829e01 --- /dev/null +++ b/examples/advanced/streaming/mute-microphone/main.py @@ -0,0 +1,118 @@ +# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +from dotenv import load_dotenv +import logging, verboselogs +from time import sleep + +from deepgram import ( + DeepgramClient, + DeepgramClientOptions, + LiveTranscriptionEvents, + LiveOptions, + Microphone, +) + +load_dotenv() + + +def main(): + try: + # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM + # config = DeepgramClientOptions( + # verbose=logging.DEBUG, options={"keepalive": "true"} + # ) + # deepgram: DeepgramClient = DeepgramClient("", config) + # otherwise, use default config + deepgram: DeepgramClient = DeepgramClient() + + # create the microphone + microphone = Microphone() + + dg_connection = deepgram.listen.live.v("1") + + def on_open(self, open, **kwargs): + print(f"\n\n{open}\n\n") + + def on_message(self, result, **kwargs): + sentence = result.channel.alternatives[0].transcript + if len(sentence) == 0: + return + microphone.mute() + print(f"speaker: {sentence}") + microphone.unmute() + + def on_metadata(self, metadata, **kwargs): + print(f"\n\n{metadata}\n\n") + + def on_speech_started(self, speech_started, **kwargs): + print(f"\n\n{speech_started}\n\n") + + def on_utterance_end(self, utterance_end, **kwargs): + print(f"\n\n{utterance_end}\n\n") + + def on_error(self, error, **kwargs): + print(f"\n\n{error}\n\n") + + def on_close(self, close, **kwargs): + print(f"\n\n{close}\n\n") + + dg_connection.on(LiveTranscriptionEvents.Open, on_open) + dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) + dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata) + dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started) + dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end) + dg_connection.on(LiveTranscriptionEvents.Error, on_error) + dg_connection.on(LiveTranscriptionEvents.Close, on_close) + + options: LiveOptions = LiveOptions( + model="nova-2", + punctuate=True, + language="en-US", + encoding="linear16", + channels=1, + sample_rate=16000, + # To get UtteranceEnd, the following must be set: + interim_results=True, + utterance_end_ms="1000", + vad_events=True, + ) + dg_connection.start(options) + + # set the callback on the microphone + microphone.set_callback(dg_connection.send) + + # start microphone + microphone.start() + + # wait until finished + input("Press Enter to mute microphone...\n\n") + + microphone.mute() + + # wait until finished + input("Press Enter to unmute microphone...\n\n") + + microphone.unmute() + + # wait until finished + input("Press Enter to stop recording...\n\n") + + # Wait for the microphone to close + microphone.finish() + + # Indicate that we've finished + dg_connection.finish() + + print("Finished") + # sleep(30) # wait 30 seconds to see if there is any additional socket activity + # print("Really done!") + + except Exception as e: + print(f"Could not open socket: {e}") + return + + +if __name__ == "__main__": + main()