Setup inicial con instalaci√≥n inteligencia de dependecias, las cuales solo se ejecutar√°n la primera vez que se inicie el notebook.

In [None]:
import importlib
import subprocess
import sys
import os
import zipfile
import urllib.request
import torch
import IPython.display as ipd
from openvoice.api import ToneColorConverter
!pip install ctranslate2
from openvoice import se_extractor


def install_if_missing(package_name, pip_name=None):
    """Instala un paquete solo si no est√° instalado."""
    pip_name = pip_name or package_name
    try:
        importlib.import_module(package_name)
        print(f"‚úÖ {package_name} ya est√° instalado.")
    except ImportError:
        print(f"‚¨áÔ∏è Instalando {pip_name}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", *pip_name.split()])


install_if_missing("torch", "torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu121")
install_if_missing("librosa")
install_if_missing("pydub")
install_if_missing("gradio")
install_if_missing("soundfile")


repos = {
    "openvoice": "git+https://github.com/myshell-ai/OpenVoice.git",
    "melo": "git+https://github.com/myshell-ai/MeloTTS.git"
}

for module_name, repo_url in repos.items():
    try:
        importlib.import_module(module_name)
        print(f"‚úÖ {module_name} ya est√° instalado.")
    except ImportError:
        print(f"‚¨áÔ∏è Instalando {module_name} desde {repo_url}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", repo_url])

# ============================================
# üîç Si sigue sin reconocer 'openvoice', forzar import manual
# ============================================
try:
    import openvoice
except ModuleNotFoundError:
    print("‚öôÔ∏è Corrigiendo ruta de importaci√≥n de OpenVoice...")
    for possible_path in ["OpenVoice", "openvoice"]:
        if os.path.isdir(possible_path):
            sys.path.append(os.path.abspath(possible_path))
            print(f"üß© Ruta agregada: {os.path.abspath(possible_path)}")
            break
    import openvoice


Descargar checkpoints solo si no existen

In [None]:
url = "https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip"
zip_name = "checkpoints_v2_0417.zip"
extract_dir = "checkpoints_v2"
root_extract_dir = "TTS"

if os.path.exists(extract_dir):
    print(f"‚úÖ '{extract_dir}' ya existe, no se descarga nuevamente.")
else:
    if not os.path.exists(zip_name):
        print(f"‚¨áÔ∏è Descargando {zip_name}...")
        urllib.request.urlretrieve(url, zip_name)
        print("‚úÖ Descarga completa.")
    else:
        print(f"üì¶ '{zip_name}' ya est√° descargado.")
    print("üìÇ Descomprimiendo...")
    with zipfile.ZipFile(zip_name, 'r') as zip_ref:
        zip_ref.extractall()
    print(f"‚úÖ Archivos extra√≠dos en '{root_extract_dir}'.")

Carga del modelo

In [None]:
ckpt_converter = 'checkpoints_v2/converter'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
output_dir = 'outputs_v2'

os.makedirs(output_dir, exist_ok=True)

tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')

print("‚úÖ Modelo cargado correctamente.")

Inicializar OpenVoice con ingl√©s y espa√±ol

In [None]:
from melo.api import TTS

reference_speaker = 'resources/example_reference.mp3'
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=True)

# ‚úÖ Solo espa√±ol e ingl√©s
texts = {
    'EN': "Did you ever hear a folk tale about a giant turtle?",
    'ES': "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante."
}

src_path = f"{output_dir}/tmp.wav"
speed = 1.0  # Velocidad ajustable

# ‚úÖ Solo estos idiomas se procesan
supported_languages = {'EN', 'ES'}

for language, text in texts.items():
    if language not in supported_languages:
        print(f"‚ö†Ô∏è Language {language} not supported, skipping.")
        continue

    print(f"\nüéôÔ∏è Generating TTS for {language}...")

    # Inicializar el modelo para el idioma
    model = TTS(language=language, device=device)
    speaker_ids = model.hps.data.spk2id

    # Procesar cada locutor disponible
    for speaker_key, speaker_id in speaker_ids.items():
        speaker_key_clean = speaker_key.lower().replace('_', '-')

        se_path = f"checkpoints_v2/base_speakers/ses/{speaker_key_clean}.pth"
        if not os.path.exists(se_path):
            print(f"‚ö†Ô∏è Missing speaker embedding for {speaker_key_clean}, skipping.")
            continue

        source_se = torch.load(se_path, map_location=device)

        # Correcci√≥n por si hay problemas con MPS (Mac)
        if torch.backends.mps.is_available() and device == 'cpu':
            torch.backends.mps.is_available = lambda: False

        # Generar audio TTS
        model.tts_to_file(text, speaker_id, src_path, speed=speed)
        save_path = f"{output_dir}/output_v2_{speaker_key_clean}_{language.lower()}.wav"

        # Convertir el tono de voz con el conversor
        encode_message = "@MyShell"
        tone_color_converter.convert(
            audio_src_path=src_path,
            src_se=source_se,
            tgt_se=target_se,
            output_path=save_path,
            message=encode_message
        )

        print(f"‚úÖ Saved: {save_path}")

Reproducci√≥n del audio

In [None]:
output_path = f"{output_dir}/output_{lang}.wav"
ipd.Audio(output_path)