### PLN - sistema de assistência virtual do zero com PLN e precisa incluir um módulo de transformação de texto em áudio (Text-to-Speech - TTS)

Implementando o módulo TTS (Text-to-Speech)

Usaremos a biblioteca: gTTS (Google Text-to-Speech) → simples, gera áudio em .mp3

In [None]:
from gtts import gTTS
from IPython.display import Audio, display
import pyttsx3
import tempfile
import ipywidgets as widgets

import os
import queue
import json
import random
import tempfile
import webbrowser
from datetime import datetime

import pygame
import sounddevice as sd
from gtts import gTTS
import vosk
import wikipedia
import pyjokes
import sys

In [None]:
def gerar_audio(texto, motor="gtts", idioma="pt"):
    if motor == "gtts":
        tts = gTTS(text=texto, lang=idioma)
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(tmp.name)
        return Audio(filename=tmp.name, autoplay=False)
    elif motor == "pyttsx3":
        engine = pyttsx3.init()
        voices = engine.getProperty('voices')
        for v in voices:
            if "portuguese" in v.name.lower() or "brazil" in v.name.lower():
                engine.setProperty('voice', v.id)
                break
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        engine.save_to_file(texto, tmp.name)
        engine.runAndWait()
        return Audio(filename=tmp.name, autoplay=False)

# Widgets interativos
texto_input = widgets.Textarea(
    value="Olá, eu sou seu assistente virtual.",
    placeholder="Digite o texto aqui...",
    description="Texto:",
    layout=widgets.Layout(width="100%", height="80px")
)

motor_dropdown = widgets.Dropdown(
    options=[("Google TTS (online)", "gtts"), ("Voz local (offline)", "pyttsx3")],
    value="gtts",
    description="Motor:"
)

botao = widgets.Button(description="Gerar Áudio", button_style="success")
saida = widgets.Output()

def on_button_click(b):
    saida.clear_output()
    with saida:
        audio = gerar_audio(texto_input.value, motor=motor_dropdown.value)
        display(audio)

botao.on_click(on_button_click)

# Exibir interface
display(texto_input, motor_dropdown, botao, saida)


In [None]:
# ------------------------------
# Pasta base do projeto
# ------------------------------
try:
    # Funciona em scripts .py
    base_path = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # Funciona em Jupyter Notebook ou ambientes interativos
    base_path = os.getcwd()

os.makedirs(base_path, exist_ok=True)


# Modelo Vosk
model_path = os.path.join(base_path, "vosk-model-small-en-us-0.15", "vosk-model-small-en-us-0.15")
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Modelo Vosk não encontrado em {model_path}")

# Pasta de músicas (exemplo)
music_dir = os.path.join(base_path, "music")  # ajuste conforme sua pasta de músicas

# ------------------------------
# Inicializa pygame
# ------------------------------
pygame.mixer.init()

# ------------------------------
# Função TTS segura para Windows
# ------------------------------
def speak(text):
    try:
        # Cria arquivo temporário seguro
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
            filename = fp.name
        tts = gTTS(text=text, lang='en')
        tts.save(filename)

        # Carrega áudio na memória
        sound = pygame.mixer.Sound(filename)
        sound.play()

        # Espera terminar de tocar
        while pygame.mixer.get_busy():
            pygame.time.Clock().tick(10)
    finally:
        # Remove arquivo temporário
        if os.path.exists(filename):
            os.remove(filename)

# ------------------------------
# Função de reconhecimento de voz offline (Vosk)
# ------------------------------
def recognize_speech():
    try:
        model = vosk.Model(model_path)
        q = queue.Queue()

        def callback(indata, frames, time, status):
            if status:
                print(status)
            q.put(bytes(indata))

        samplerate = 16000
        with sd.RawInputStream(samplerate=samplerate, blocksize=8000,
                               dtype='int16', channels=1, callback=callback):
            rec = vosk.KaldiRecognizer(model, samplerate)
            print("🎤 Listening... (say 'exit' to quit)")
            while True:
                data = q.get()
                if rec.AcceptWaveform(data):
                    result = json.loads(rec.Result())
                    text = result.get("text", "").lower()
                    if text:
                        print("You said:", text)
                        return text
    except Exception as e:
        print("Microphone not available, fallback to text input.")
        return input("Type your command: ").lower()

# ------------------------------
# Funções de música
# ------------------------------
def play_music(song_path):
    if os.path.exists(song_path):
        sound = pygame.mixer.Sound(song_path)
        sound.play()
        return sound
    else:
        speak("Song not found.")
        return None

def stop_music():
    pygame.mixer.stop()

# ------------------------------
# Função de respostas
# ------------------------------
def respond(text):
    text = text.lower()
    if 'youtube' in text:
        speak("What do you want to search for?")
        keyword = recognize_speech()
        if keyword:
            url = f"https://www.youtube.com/results?search_query={keyword}"
            webbrowser.open(url)
            speak(f"Here is what I found for {keyword} on YouTube")

    elif 'search' in text:
        speak("What do you want to search for?")
        query = recognize_speech()
        if query:
            try:
                result = wikipedia.summary(query, sentences=2)
                speak("According to Wikipedia")
                print(result)
                speak(result)
            except wikipedia.exceptions.DisambiguationError as e:
                speak(f"Your search is too broad, maybe you mean {e.options[0]}")
            except wikipedia.exceptions.PageError:
                speak("Sorry, I could not find anything.")

    elif 'joke' in text:
        speak(pyjokes.get_joke())

    elif 'what time' in text:
        strTime = datetime.today().strftime("%H:%M %p")
        print(strTime)
        speak(strTime)

    elif 'open google' in text:
        webbrowser.open("https://www.google.com")
        speak("Google is now open.")
    
    elif 'open wiki' in text:
        webbrowser.open("https://www.wikipedia.org/")
        speak("Wikipedia is now open.")

    elif 'linked in' in text:
        webbrowser.open("https://www.linkedin.com/in/dsantanaweb/")
        speak("Linkedin is now open.")

    elif 'exit' in text or 'quit' in text or 'bye' in text or 'is it' in text:
        speak("Goodbye, till next time!")
        pygame.mixer.quit()  # Encerra o mixer do pygame
        sys.exit()  # Força encerramento do programa

# ------------------------------
# Loop principal do assistente
# ------------------------------
if __name__ == "__main__":
    speak("Hello, I am ready.")

    while True:
        command = recognize_speech()
        if command:
            respond(command)
