<a href="https://colab.research.google.com/github/detektor777/colab_list_audio/blob/main/text_to_speech_edge_tts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title ##**Install** { display-mode: "form" }
%%capture
!pip install edge-tts --upgrade
!pip install nest_asyncio

In [None]:
# @title ##**Config** { display-mode: "form" }
import edge_tts
import asyncio
import ipywidgets as widgets
import nest_asyncio
from IPython.display import display, clear_output

nest_asyncio.apply()

async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

def get_languages(voices):
    languages = set()
    for voice in voices.keys():
        lang = voice.split(" - ")[1].split(" (")[0]
        languages.add(lang)
    return sorted(list(languages))

global_params = {
    "text": "",
    "voice": "",
    "rate": 0,
    "pitch": 0
}

voices = asyncio.run(get_voices())
languages = get_languages(voices)

text_input = widgets.Textarea(
    value="Hello! This is a test text for speech synthesis.",
    placeholder="Enter text",
    description="Text:",
    layout={'width': '500px', 'height': '100px'}
)

language_dropdown = widgets.Dropdown(
    options=languages,
    value="en-US",
    description="Language:"
)

voice_dropdown = widgets.Dropdown(
    options=[""],
    description="Voice:"
)

rate_slider = widgets.IntSlider(
    value=29,
    min=-50,
    max=50,
    step=1,
    description="Rate (%):"
)

pitch_slider = widgets.IntSlider(
    value=0,
    min=-20,
    max=20,
    step=1,
    description="Pitch (Hz):"
)

def update_voices(change):
    selected_language = change['new']
    filtered_voices = [voice for voice in voices.keys() if f" - {selected_language} (" in voice]
    voice_dropdown.options = [""] + filtered_voices
    voice_dropdown.value = ""

language_dropdown.observe(update_voices, names='value')

update_voices({'new': language_dropdown.value})

def save_params(change):
    global_params["text"] = text_input.value
    global_params["voice"] = voice_dropdown.value
    global_params["rate"] = rate_slider.value
    global_params["pitch"] = pitch_slider.value

text_input.observe(save_params, names='value')
voice_dropdown.observe(save_params, names='value')
rate_slider.observe(save_params, names='value')
pitch_slider.observe(save_params, names='value')

display(text_input)
display(language_dropdown)
display(voice_dropdown)
display(rate_slider)
display(pitch_slider)

save_params(None)


In [None]:
# @title ##**Run** { display-mode: "form" }
import edge_tts
import asyncio
import tempfile
import nest_asyncio
from IPython.display import Audio, display

nest_asyncio.apply()

async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."

    try:
        voice_short_name = voice.split(" - ")[0]
        rate_str = f"{rate:+d}%"
        pitch_str = f"{pitch:+d}Hz"
        communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)

        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
            tmp_path = tmp_file.name
            await communicate.save(tmp_path)

        return tmp_path, None
    except Exception as e:
        return None, f"Error occurred: {str(e)}"

async def main():
    text = global_params["text"]
    voice = global_params["voice"]
    rate = global_params["rate"]
    pitch = global_params["pitch"]

    if not voice:
        print("Error: Please select a voice.")
        return

    audio_path, error = await text_to_speech(text, voice, rate, pitch)

    if error:
        print(error)
    else:
        print(f"Audio successfully generated: {audio_path}")
        global_params["audio_path"] = audio_path
        display(Audio(audio_path, autoplay=True))

asyncio.run(main())


In [None]:
# @title ##**Download** { display-mode: "form" }
from google.colab import files

if global_params["audio_path"] is None:
    print("Error: First, generate the audio by running the previous cell.")
else:
    files.download(global_params["audio_path"])
    print(f"Downloading file: {global_params['audio_path']}")