<a href="https://colab.research.google.com/github/gleidsonnunes/scripts/blob/master/Narrador_XTTS_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Instalar dependencias
# Resetar tudo
!pip uninstall -y torch torchaudio torchvision transformers TTS
!pip cache purge

# Instalar Torch CPU-only (sem dependência de Triton)
!pip install torch==2.1.2 torchaudio==2.1.2 torchvision>=0.11 --index-url https://download.pytorch.org/whl/cpu

# Instalar versão estável da TTS que usa XTTS
!pip install TTS==0.21.1

# (opcional) instalar transformers caso necessário para outras tarefas
!pip install transformers==4.36.2

!pip install gradio pymupdf ebooklib pydub bs4

!apt-get update && apt-get install -y ffmpeg

Found existing installation: torch 2.1.2+cpu
Uninstalling torch-2.1.2+cpu:
  Successfully uninstalled torch-2.1.2+cpu
Found existing installation: torchaudio 2.1.2+cpu
Uninstalling torchaudio-2.1.2+cpu:
  Successfully uninstalled torchaudio-2.1.2+cpu
Found existing installation: torchvision 0.16.2+cpu
Uninstalling torchvision-0.16.2+cpu:
  Successfully uninstalled torchvision-0.16.2+cpu
Found existing installation: transformers 4.53.2
Uninstalling transformers-4.53.2:
  Successfully uninstalled transformers-4.53.2
Found existing installation: TTS 0.21.1
Uninstalling TTS-0.21.1:
  Successfully uninstalled TTS-0.21.1
Files removed: 48
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 4.1.0 requires transformers<5.0.0,>=4.41.0, which is not installed.
peft 0.16.0 requires transformers, which is not installed.[0m[31m
[0mCollecting TTS==0

In [2]:
#@title Enviar Voz
from google.colab import files
uploaded = files.upload()
import shutil
for name in uploaded:
    if name.endswith(".mp3") or name.endswith(".wav"):
        shutil.move(name, "voz_clonada_sample.wav")

In [None]:
#@title Interface
import gradio as gr
from TTS.api import TTS
import fitz  # PyMuPDF
from ebooklib import epub
from bs4 import BeautifulSoup
import os
from pydub import AudioSegment
import torch # Import torch

os.makedirs("audios", exist_ok=True)

SPEAKER_WAV = "voz_clonada_sample.wav"
OUTPUT_DIR = "audios"

# Aceita os termos automaticamente (Coqui)
os.environ["COQUI_TOS_AGREED"] = "1"

# Allowlist XttsConfig and XttsAudioConfig for torch.load
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import XttsArgs
from TTS.config.shared_configs import BaseDatasetConfig
from TTS.tts.models.xtts import XttsAudioConfig
# torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs])


tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True, gpu=False)

emotions = ["neutral", "happy", "sad", "angry", "excited", "sleepy", "whispering", "shouting"]

def extract_text(file):
    ext = os.path.splitext(file.name)[-1].lower()
    text = ""
    if ext == ".pdf":
        doc = fitz.open(stream=file.read(), filetype="pdf")
        for page in doc:
            text += page.get_text()
    elif ext == ".epub":
        book = epub.read_epub(file.name)
        for item in book.get_items():
            if item.get_type() == epub.EpubHtml:
                soup = BeautifulSoup(item.get_content(), 'html.parser')
                text += soup.get_text()
    return text.strip()

def narrar(texto, emocao):
    if not texto:  # Check if text is empty
        return None, None  # Return None for both outputs if text is empty

    wav_path = os.path.join(OUTPUT_DIR, "narracao.wav")
    mp3_path = os.path.join(OUTPUT_DIR, "narracao.mp3")

    tts.tts_to_file(
        text=texto,
        speaker_wav=SPEAKER_WAV,
        language="pt",
        emotion=emocao,  # Include emotion
        file_path=wav_path
    )

    audio = AudioSegment.from_wav(wav_path)
    audio.export(mp3_path, format="mp3")

    return wav_path, mp3_path

with gr.Blocks() as demo:
    gr.Markdown("## Narrador XTTS com sua voz preferida 🎤")

    with gr.Row():
        arquivo = gr.File(label="Carregar PDF ou EPUB", file_types=[".pdf", ".epub"])
        emocao = gr.Dropdown(emotions, label="Emoção", value="neutral")

    texto_extraido = gr.Textbox(label="Texto extraído", lines=10)
    carregar_btn = gr.Button("Extrair texto")

    carregar_btn.click(fn=extract_text, inputs=arquivo, outputs=texto_extraido)

    with gr.Row():
        narrar_btn = gr.Button("Narrar")
        audio_saida = gr.Audio(label="Ouvir áudio")
        download_mp3 = gr.File(label="Baixar em MP3")

    narrar_btn.click(fn=narrar, inputs=[texto_extraido, emocao], outputs=[audio_saida, download_mp3])

demo.launch(share=True, debug=True)

 > Downloading model to /root/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2


100%|█████████▉| 1.87G/1.87G [00:40<00:00, 36.1MiB/s]
100%|██████████| 1.87G/1.87G [00:41<00:00, 45.0MiB/s]
4.37kiB [00:00, 6.88kiB/s]
246kiB [00:00, 2.35MiB/s]
361kiB [00:00, 430kiB/s] 


 > Model's license - CPML
 > Check https://coqui.ai/cpml.txt for more info.
 > Using model: xtts
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7c94a81c03b5deee9a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


 > Text splitted to sentences.
['Capítulo 1 – Login no Paraíso de Aventuras', 'A luz azul do visor piscava com suavidade, refletindo no rosto tranquilo de Gleidson enquanto ele repousava na cápsula de Realidade Total.', 'Um som sutil ecoou pelo ambiente:', '“Sincronização completa. Bem-vindo de volta a Elaria.”', 'E então, num piscar de olhos, o mundo mudou.', 'O céu acima era de um azul vibrante, cortado por nuvens fofas que pareciam algodão.', 'Ao seu redor, colinas verdes ondulavam até o horizonte, e uma estrada de pedra se perdia entre vilarejos e torres mágicas.', 'Gleidson sorriu.', 'Vestia roupas leves de aventureiro iniciante, com um manto azul-acinzentado esvoaçando nas costas.', '— Mano, cê caiu de novo de cara no chão?', '— disse uma voz animada, e ao se virar, ele viu Lina, sua irmã mais nova na vida real, mas aqui transformada em uma espadachim de cabelo curto prateado, com olhos rubros e uma expressão travessa.', '— Eu só... tava apreciando a paisagem — ele respondeu, se 