## Text To Speech
The goal here is to utilise the `edge-tts` library to dictate text in an human expressible way.

In [13]:
import os
import edge_tts
from datetime import datetime
import speech_recognition as sr

In [None]:
async def speak_response(text: str, voice: str = "en-US-JennyNeural", output_folder: str = "tts_outputs") -> None:
    """
    Convert text to speech utilsing the Microsoft Edge TTS and save as MP3 file.
    
    Params:
        :text: The text to convert to speech
        :voice: Voice to use for output (one of many)
        :output_folder: Name of the output folder to keep recordings
    """
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Create filename with timestamp
    timestamp = datetime.now().strftime("%Y-%m-%d_%H:%M")
    output_filename = f"response_{timestamp}.mp3"
    output_file = os.path.join(output_folder, output_filename)
    
    # Utilse Microsoft Edge TTS to generate speech from text
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(output_file)
    
    print(f"Response saved as: {output_file}")

In [None]:
def record_microphone(recogniser : sr.Recognizer, duration: int = 5) -> str:
    
    with sr.Microphone() as source:
        print("Adjusting for ambient noise")
        # Calibrates background noise level to differentiate speech from silence
        recogniser.adjust_for_ambient_noise(source, duration=1.5)
        
        print(f"Listening for {duration} seconds... Speak now!")
        try:
            # Record audio until specific limit
            input = recogniser.listen(source, timeout=1, phrase_time_limit=duration)
            print('Recording Complete. Processing, please wait.')
            
            # Transcribe
            text = recogniser.recognize_google(input)
            print(f"You said: {text}")
            return text
        
        except Exception as e:
            print(f"Error: {e}")
            return None
        

In [19]:
# Create a Recognizer instance
recognizer = sr.Recognizer()

# Call the function with the recognizer
text = record_microphone(recognizer, duration=5)  

Adjusting for ambient noise
Listening for 5 seconds... Speak now!
Recording Complete. Processing, please wait.
You said: hello it's me
