<a href="https://colab.research.google.com/github/GudisaSandeep/Multi-Language-Translator-with-Text-and-Voice-Outputs/blob/main/Video_Translator_Application.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install moviepy torchaudio transformers scipy langid gradio



In [None]:
from moviepy.editor import VideoFileClip, AudioFileClip
import torchaudio
from transformers import AutoProcessor, SeamlessM4Tv2Model
import scipy.io.wavfile
import numpy as np
import langid
import gradio as gr

In [None]:
def extract_audio(video_path):
    video = VideoFileClip(video_path)
    audio_path = "extracted_audio.wav"
    video.audio.write_audiofile(audio_path)
    return audio_path

In [None]:
def detect_language(audio_path):
    audio, _ = torchaudio.load(audio_path)
    # Placeholder: Replace with actual speech-to-text for production use
    dummy_text = "This is a placeholder for actual speech-to-text output."
    src_lang, _ = langid.classify(dummy_text)
    return src_lang

In [None]:
def translate_audio(audio_path, tgt_lang):
    # Load and resample audio to 16kHz if needed
    audio, orig_freq = torchaudio.load(audio_path)
    if orig_freq != 16000:
        audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)

    # Load processor and model
    processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
    model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")

    # Process audio and translate
    audio_inputs = processor(audios=audio, sampling_rate=16000, return_tensors="pt")
    translated_audio = model.generate(**audio_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()

    # Save translated audio
    translated_audio_path = "translated_audio.wav"
    scipy.io.wavfile.write(translated_audio_path, rate=16000, data=translated_audio)
    return translated_audio_path

In [None]:
def merge_audio_video(video_path, audio_path):
    video = VideoFileClip(video_path)
    audio = AudioFileClip(audio_path)
    video_with_new_audio = video.set_audio(audio)
    output_video_path = "translated_video.mp4"
    video_with_new_audio.write_videofile(output_video_path)
    return output_video_path

In [None]:
def translate_video(video, tgt_lang):
    # Step 1: Extract audio
    audio_path = extract_audio(video)

    # Step 2: Detect source language
    src_lang = detect_language(audio_path)
    print(f"Detected source language: {src_lang}")

    # Step 3: Translate audio
    translated_audio_path = translate_audio(audio_path, tgt_lang)

    # Step 4: Merge translated audio with video
    output_video_path = merge_audio_video(video, translated_audio_path)

    return output_video_path

In [None]:
# Define the Gradio interface
interface = gr.Interface(
    fn=translate_video,
    inputs=[
        gr.Video(label="Upload Video"),
        gr.Dropdown(label="Target Language", choices=["spa", "fra", "deu", "ita","hin","tel","eng",])  # Spanish, French, German, Italian
    ],
    outputs=gr.Video(label="Translated Video"),
    title="Video Translator",
    description="Upload a video, select the target language, and get the translated video."
)

# Launch the interface
interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fbe49ca8bb9f8a8342.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


