In [1]:
!pip install gradio openai-whisper gtts deep-translator

Collecting gradio
  Downloading gradio-5.0.2-py3-none-any.whl.metadata (15 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/800.5 kB[0m [31m9.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/800.5 kB[0m [31m15.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting gtts
  Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)
Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.w

In [2]:
import gradio as gr
import whisper
from gtts import gTTS
from deep_translator import GoogleTranslator
import uuid
from pathlib import Path

In [8]:

# Function to transcribe audio using Whisper
def transcribe_audio(audio_file):
    model = whisper.load_model("base")
    result = model.transcribe(audio_file)
    return result['text']


# Function to translate text using GoogleTranslator
def translate_text(text: str) -> list:
    languages = ["ru", "tr", "zh-CN", "de", "es", "ja"]
    list_translations = []

    for lang in languages:
        translation = GoogleTranslator(source="auto", target=lang).translate(text)
        list_translations.append(translation)

    return list_translations


# Function to generate speech from text using gTTS
def text_to_speech(text, lang):
    tts = gTTS(text=text, lang=lang)
    filename = f"{uuid.uuid4()}.mp3"
    tts.save(filename)
    return filename


# Main function to handle voice-to-voice translations
def voice_to_voice(audio_file):
    # Transcribe speech using Whisper
    transcript = transcribe_audio(audio_file)

    # Translate text
    translations = translate_text(transcript)

    # Generate speech from translated text using gTTS
    audio_files = []
    for translation, lang in zip(translations, ["ru", "tr", "zh-CN", "de", "es", "ja"]):
        audio_file = text_to_speech(translation, lang)
        audio_files.append(audio_file)

    # Return audio files and translations
    return (audio_files[0], audio_files[1], audio_files[2], audio_files[3], audio_files[4], audio_files[5],
            translations[0], translations[1], translations[2], translations[3], translations[4], translations[5])


# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Запишите свой голос и получайте переводы с воспроизведением.")

    with gr.Row():
        audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")

    with gr.Row():
        submit = gr.Button("Submit")
        btn = gr.ClearButton(audio_input, "Clear")

    with gr.Row():
        with gr.Group():
            ru_audio = gr.Audio(label="Russian")
            ru_text = gr.Textbox(label="Russian Text")

        with gr.Group():
            tr_audio = gr.Audio(label="Turkish")
            tr_text = gr.Textbox(label="Turkish Text")

        with gr.Group():
            zh_CN_audio = gr.Audio(label="Chinese")
            zh_CN_text = gr.Textbox(label="Chinese Text")

        with gr.Group():
            de_audio = gr.Audio(label="German")
            de_text = gr.Textbox(label="German Text")

        with gr.Group():
            es_audio = gr.Audio(label="Spanish")
            es_text = gr.Textbox(label="Spanish Text")

        with gr.Group():
            ja_audio = gr.Audio(label="Japanese")
            ja_text = gr.Textbox(label="Japanese Text")

    output_components = [ru_audio, tr_audio, zh_CN_audio, de_audio, es_audio, ja_audio,
                         ru_text, tr_text, zh_CN_text, de_text, es_text, ja_text]

    submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components)

# Run the Gradio app in Colab
if __name__ == "__main__":
    demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://57c6441d0148a529eb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
