In [80]:
# Install required libraries
!pip install moviepy torch torchaudio transformers sentencepiece gtts ipywidgets

import os
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
import torch
import torchaudio
from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer
from gtts import gTTS
from IPython.display import display, HTML
import ipywidgets as widgets



In [81]:
!pip install streamlit



In [82]:
!pip install streamlit-webrtc opencv-python-headless
!pip install opencv-python



In [83]:
%%writefile app.py
import streamlit as st
import os
import tempfile
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
import torch
import torchaudio
from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer
from gtts import gTTS
from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
import av
import cv2

# Function to extract audio from video
def extract_audio(video_path, output_path='temp_audio.mp3'):
    try:
        video = VideoFileClip(video_path)
        audio = video.audio
        audio.write_audiofile(output_path)
        video.close()
        st.success(f"Audio extracted successfully.")
        return output_path
    except Exception as e:
        st.error(f"An error occurred during audio extraction: {str(e)}")
        return None

# Function to transcribe audio
def transcribe_audio(audio_file_path):
    model_name = "openai/whisper-small"
    processor = WhisperProcessor.from_pretrained(model_name)
    model = WhisperForConditionalGeneration.from_pretrained(model_name)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    waveform, sample_rate = torchaudio.load(audio_file_path)
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
    if sample_rate != 16000:
        waveform = torchaudio.functional.resample(waveform, sample_rate, 16000)

    input_features = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_features.to(device)
    predicted_ids = model.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
    return transcription

# Function to translate text to Urdu
def translate_to_urdu(text):
    model_name = "facebook/nllb-200-distilled-600M"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    urdu_code = "urd_Arab"

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    translated = model.generate(
        **inputs,
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(urdu_code),
        max_length=512
    )
    translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
    return translated_text

# Function to generate Urdu audio
def generate_urdu_audio(text, output_path='translated_audio.mp3'):
    tts = gTTS(text, lang='ur')
    tts.save(output_path)
    return output_path

# Function to process video
def process_video(video_path):
    # Extract audio
    audio_path = extract_audio(video_path)
    if not audio_path:
        return

    # Transcribe audio
    transcription = transcribe_audio(audio_path)
    st.write("Transcription:", transcription)

    # Translate to Urdu
    urdu_translation = translate_to_urdu(transcription)
    st.write("Urdu Translation:", urdu_translation)

    # Generate Urdu audio
    urdu_audio_path = generate_urdu_audio(urdu_translation)

    # Combine Urdu audio with original video
    video = VideoFileClip(video_path)
    urdu_audio = AudioFileClip(urdu_audio_path)

    # If the Urdu audio is shorter, loop it to match the video duration
    if urdu_audio.duration < video.duration:
        n_loops = int(video.duration / urdu_audio.duration) + 1
        urdu_audio = CompositeAudioClip([urdu_audio] * n_loops).subclip(0, video.duration)
    else:
        urdu_audio = urdu_audio.subclip(0, video.duration)

    final_video = video.set_audio(urdu_audio)

    # Save the video file
    output_path = os.path.splitext(video_path)[0] + "_translated.mp4"
    final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')

    # Clean up temporary files
    os.remove(audio_path)
    os.remove(urdu_audio_path)

    st.success(f"Process completed. Translated video saved as {output_path}")
    return output_path

# Function to process text
def process_text(text):
    urdu_translation = translate_to_urdu(text)
    st.write("Urdu Translation:", urdu_translation)
    urdu_audio_path = generate_urdu_audio(urdu_translation)
    return urdu_audio_path

# Function to process audio
def process_audio(audio_path):
    transcription = transcribe_audio(audio_path)
    st.write("Transcription:", transcription)
    urdu_translation = translate_to_urdu(transcription)
    st.write("Urdu Translation:", urdu_translation)
    urdu_audio_path = generate_urdu_audio(urdu_translation)
    return urdu_audio_path

# Function to process video frames
class VideoProcessor:
    def __init__(self):
        self.frames = []

    def recv(self, frame):
        img = frame.to_ndarray(format="bgr24")
        self.frames.append(img)
        return av.VideoFrame.from_ndarray(img, format="bgr24")

# Streamlit app
def main():
    st.set_page_config(page_title="English to Urdu Translation App", layout="wide")

    st.title("English to Urdu Translation App")

    # Option to choose input type
    option = st.selectbox("Choose an option:",
                          ("Upload Video", "Record Video", "Enter Text", "Upload Text File", "Upload Audio"))

    if option == "Upload Video":
        st.subheader("Upload Your Video")
        uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
        if uploaded_file is not None:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
                temp_video.write(uploaded_file.getvalue())
                temp_video_path = temp_video.name
            st.success("Video uploaded successfully.")

            if st.button("Translate Video"):
                with st.spinner("Processing video..."):
                    translated_video_path = process_video(temp_video_path)
                if translated_video_path:
                    st.video(translated_video_path)
                os.unlink(temp_video_path)

    elif option == "Record Video":
        st.subheader("Record Your Video")
        ctx = webrtc_streamer(
            key="video-recorder",
            mode=WebRtcMode.SENDRECV,
            rtc_configuration=RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}),
            video_processor_factory=VideoProcessor,
            async_processing=True,
        )

        if st.button("Stop recording and translate"):
            if ctx.video_processor:
                frames = ctx.video_processor.frames
                if frames:
                    # Save frames as a video file
                    temp_video_path = tempfile.mktemp(suffix=".mp4")
                    out = cv2.VideoWriter(temp_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frames[0].shape[1], frames[0].shape[0]))
                    for frame in frames:
                        out.write(frame)
                    out.release()

                    st.success("Video recorded successfully.")

                    with st.spinner("Processing video..."):
                        translated_video_path = process_video(temp_video_path)
                    if translated_video_path:
                        st.video(translated_video_path)
                    os.unlink(temp_video_path)
                else:
                    st.error("No video recorded. Please record a video before translating.")
            else:
                st.error("No video recorded. Please record a video before translating.")

    elif option == "Enter Text":
        st.subheader("Enter Your Text")
        text = st.text_area("Enter English text:")
        if st.button("Translate Text"):
            with st.spinner("Translating..."):
                urdu_audio_path = process_text(text)
            st.audio(urdu_audio_path)
            os.unlink(urdu_audio_path)

    elif option == "Upload Text File":
        st.subheader("Upload Your Text File")
        uploaded_file = st.file_uploader("Choose a text file", type=["txt"])
        if uploaded_file is not None:
            text = uploaded_file.getvalue().decode("utf-8")
            st.text_area("File contents:", text)
            if st.button("Translate Text"):
                with st.spinner("Translating..."):
                    urdu_audio_path = process_text(text)
                st.audio(urdu_audio_path)
                os.unlink(urdu_audio_path)

    elif option == "Upload Audio":
        st.subheader("Upload Your Audio File")
        uploaded_file = st.file_uploader("Choose an audio file", type=["mp3", "wav"])
        if uploaded_file is not None:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
                temp_audio.write(uploaded_file.getvalue())
                temp_audio_path = temp_audio.name

            if st.button("Translate Audio"):
                with st.spinner("Translating..."):
                    urdu_audio_path = process_audio(temp_audio_path)
                st.audio(urdu_audio_path)
                os.unlink(temp_audio_path)
                os.unlink(urdu_audio_path)

  # Replace the existing st.markdown section at the end of the main() function with this:

    st.markdown("---")
    st.markdown("""
    Created by Abbas Khan
    - Email: abhikhan2999@gmail.com
    - LinkedIn: [Abbas Khan](https://www.linkedin.com/in/abhi-khan-71467526b/)
    """)

    st.markdown("---")
    st.subheader("Special Tributes to My Mentors")
    st.markdown("""
    I would like to express my gratitude to the following mentors who have greatly influenced my journey:

    - [Andrew Ng](https://learn.deeplearning.ai/) - For his invaluable contributions to AI education
    - [Irfan Malik](https://www.youtube.com/channel/UCKc0J2A7znmiFwIjXUvmdvw) - For his insightful YouTube tutorials
    - [Ammar Tufail](https://www.youtube.com/@Codanics) - For his excellent coding tutorials on Codanics

    Their guidance and resources have been instrumental in my learning journey.
    """)

if __name__ == "__main__":
    main()

Overwriting app.py


In [84]:
import subprocess
import threading

# Function to run the Streamlit app
def run_streamlit():
    subprocess.run(["streamlit", "run", "app.py"])

# Run Streamlit app in a separate thread
thread = threading.Thread(target=run_streamlit)
thread.start()

In [None]:
# Create a tunnel using serveo.net
!ssh -o StrictHostKeyChecking=no -R 80:localhost:8501 serveo.net

[32mForwarding HTTP traffic from https://8653df1a34e93313ce51baaf06722bc9.serveo.net
[0mHTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/
HTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/static/media/SourceSansPro-Regular.0d69e5ff5e92ac64a0c9.woff2
HTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/static/media/SourceSansPro-SemiBold.abed79cd0df1827e18cf.woff2
HTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/static/media/SourceSansPro-Bold.118dea98980e20a81ced.woff2
HTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/static/js/main.d55f6a3c.js
HTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/static/css/main.29bca1b5.css
HTTP request from 103.137.24.159 to https://8653df1a34e93313ce51baaf06722bc9.serveo.net/_stcore/health
HTTP request from 103.137.24.159 to ht