Milestone 5 Group 11

Collaborators: Jill Shah & Dhruvik Patel

# Instructions
1. Run the cell in the library installation section
2. Restart the machine
3. Run all cells
4. Click the link from the output in website link section

# Library Installation

In [1]:
%pip install TTS
%pip install pytube
%pip install pyngrok
%pip install streamlit
%pip install sacremoses
%pip install transformers
%pip install sentencepiece
%pip install git+https://github.com/openai/whisper.git

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-3q93d6nz
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-3q93d6nz
  Resolved https://github.com/openai/whisper.git to commit e58f28804528831904c3b6f2c0e473f346223433
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# Creating python file

In [2]:
%%writefile app.py
import os
import re
import site
import torch
import whisper
import streamlit as st
from pytube import YouTube
from moviepy.editor import *
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
from transformers import MarianMTModel, MarianTokenizer

def format_timestamp(seconds):
    milliseconds = round(seconds * 1000.0)

    hours = milliseconds // 3_600_000
    milliseconds -= hours * 3_600_000

    minutes = milliseconds // 60_000
    milliseconds -= minutes * 60_000

    seconds = milliseconds // 1_000
    milliseconds -= seconds * 1_000

    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

st.title("Translate YouTube Video with Subtitles")
link = st.text_input("YouTube Video URL")

if st.button("Transcribe"):
    with st.spinner("Downloading Video..."):
        if len(link) == 0:
            st.write("The link is empty.")
            st.write("Using default link.")
            link = "https://www.youtube.com/watch?v=G3Eup4mfJdA"
        video = YouTube(link)
        if video.age_restricted:
            st.write("The video is age restricted.")
            st.write("Using default link.")
            link = "https://www.youtube.com/watch?v=G3Eup4mfJdA"
        video_stream = video.streams.get_highest_resolution()
        video_name = video_stream.default_filename
        video_stream.download(filename=video_name)
        video_name = os.path.splitext(video_name)[0]

    with st.spinner("Extracting Audio..."):
        video = VideoFileClip(video_name + ".mp4")
        audio = video.audio
        video = video.without_audio()
        video.write_videofile(video_name + "1.mp4")
        audio.write_audiofile(video_name + ".mp3")
        os.system("rm " + video_name.replace(" ", "\ ") + ".mp4")
        os.rename(video_name + "1.mp4", video_name + ".mp4")

    with st.spinner("Transcribing Audio..."):
        model = whisper.load_model("base")
        result = model.transcribe(video_name + ".mp3")

    with st.spinner("Extracting Subtitles..."):
        subtitle = []
        for i, segment in enumerate(result["segments"], start=1):
            subtitle.append(f"{i}\n")
            text = segment["text"].strip()
            start = format_timestamp(segment["start"])
            end = format_timestamp(segment["end"])
            subtitle.append(f"{start} --> {end}\n")
            subtitle.append(f"{text}\n\n")

    with st.spinner("Translating Subtitles..."):
        model_name = "Helsinki-NLP/opus-mt-en-es"
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)

        text = ""
        i = 0

        for line in subtitle:
            line = line.strip()
            if re.match(r'\d{1}', line):
                i = i + 1
                continue
            elif re.match(r'\d{2}', line):
                i = i + 1
                continue
            elif re.match(r'\d{2}:\d{2}', line):
                i = i + 1
                continue
            else:
                inputs = tokenizer(line, return_tensors="pt", padding=True, truncation=True, max_length=512)
                translated = model.generate(**inputs)
                translation = tokenizer.decode(translated[0], skip_special_tokens=True)
                subtitle[i] = translation + "\n\n"
                text = text + " " + translation
                torch.cuda.empty_cache()
                i = i + 1

    with st.spinner("Converting Text to Speech..."):
        location = site.getsitepackages()[0]
        path = location+"/TTS/.models.json"
        model_manager = ModelManager(path)
        model_path, config_path, model_item = model_manager.download_model("tts_models/es/mai/tacotron2-DDC")
        voc_path, voc_config_path, _ = model_manager.download_model(model_item["default_vocoder"])
        synthesizer = Synthesizer(
            tts_checkpoint=model_path,
            tts_config_path=config_path,
            vocoder_checkpoint=voc_path,
            vocoder_config=voc_config_path
        )

        output = synthesizer.tts(text)
        synthesizer.save_wav(output, f"{video_name}.wav")

    with st.spinner("Building Video..."):
        video_clip = VideoFileClip(video_name + ".mp4")
        audio_clip = AudioFileClip(video_name + ".wav")
        final_clip = video_clip.set_audio(audio_clip)
        final_clip.write_videofile(video_name + "1.mp4")
        os.system("rm " + video_name.replace(" ", "\ ") + ".mp4")
        os.rename(video_name + "1.mp4", video_name + ".mp4")
        with open(f"{video_name}.srt", "w") as file:
            for line in subtitle:
                file.write(line)
        if os.path.exists("output.mp4"):
            os.system("rm output.mp4")
        os.system("ffmpeg -i " + video_name.replace(" ", "\ ") + ".mp4 -vf subtitles=" + video_name.replace(" ", "\ ") + ".srt output.mp4")

    st.header(video_name)
    st.video("output.mp4")

Writing app.py


# ngrok

In [3]:
!ngrok authtoken 2HCdQXYcQU8iQXhKK0h5mOU4cPL_3PNrcxUMUJJVtobGMGHr7
!wget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
!tar -xvzf ngrok-v3-stable-linux-amd64.tgz
get_ipython().system_raw('./ngrok http 8501 &')

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
--2023-12-11 02:48:42--  https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
Resolving bin.equinox.io (bin.equinox.io)... 52.202.168.65, 54.161.241.46, 18.205.222.128, ...
Connecting to bin.equinox.io (bin.equinox.io)|52.202.168.65|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8812405 (8.4M) [application/octet-stream]
Saving to: ‘ngrok-v3-stable-linux-amd64.tgz’


2023-12-11 02:48:44 (4.29 MB/s) - ‘ngrok-v3-stable-linux-amd64.tgz’ saved [8812405/8812405]

ngrok


# Website Link

In [4]:
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/usr/lib/python3.10/json/__init__.py", line 293, in load
    return loads(fp.read(),
  File "/usr/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)


# Running the app

In [None]:
!streamlit run app.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.124.141.159:8501[0m
[0m
