# Subtitle Generator

<a target="_blank" href="https://colab.research.google.com/github/imkasen/video-tools/blob/main/subtitle_generator.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
# @title ## 1. Install Requirements
%pip install --quiet yt-dlp openai-whisper faster-whisper ctranslate2==4.4.0

In [None]:
# @title ## 2. Download Video
# @markdown For example, input a YouTube link: https://www.youtube.com/watch?v=lytxafTXg6c

import subprocess
import os
import re


def download_audio(video_url: str) -> str | None:
    """
    Use yt-dlp to download the audio from a given video URL.
    """

    try:
        command = [
            "yt-dlp",
            "-x",
            "--audio-format",
            "mp3",
            "-o",
            "%(title)s.%(ext)s",
            video_url
        ]
        process = subprocess.run(command, capture_output=True, text=True, check=True)
        print("Download success!")

        # Extract file name
        # print(f"Output: {process.stdout}")
        match = re.search(r"\[ExtractAudio\] Destination: (.*\.mp3)", process.stdout)
        if match:
            return match.group(1)
        else:
            print("Can not get file name")
            return None
    except subprocess.CalledProcessError as e:
        print(e.stderr)
        return None
    except FileNotFoundError:
        print("Error: Can not find \"yt-dlp\".")
        return None


video_link = ""  # @param {type: "string"}

if video_link:
    audio_filename = download_audio(video_link)
    prefix_filename, _ = os.path.splitext(audio_filename)
    if audio_filename:
        print(f"Audio file path: \"{audio_filename}\"")
    else:
        print("Fail to download.")
else:
    print("Please enter a video link.")

In [None]:
# @title ## 3. Upload Video/Audio
# @markdown You can upload a video or audio file, too.

from google.colab import files
import os
import mimetypes


uploaded: dict = files.upload()

uploaded_filename: str = ""
if len(list(uploaded.keys())) == 1:
    uploaded_filename: str = list(uploaded.keys())[0]

if " " in uploaded_filename:
    new_filename = uploaded_filename.replace(" ", "_")
    os.rename(uploaded_filename, new_filename)
    print(f"Rename file from '{uploaded_filename}' to '{new_filename}'")
    uploaded_filename = new_filename

prefix_filename, _ = os.path.splitext(uploaded_filename)
mime_type, _ = mimetypes.guess_type(uploaded_filename)
audio_filename: str = ""

if mime_type and mime_type.startswith("video/"):
    os.system(f'ffmpeg -hide_banner -v error -i "{uploaded_filename}" -vn -c:a aac "{prefix_filename}.aac" -y')
    audio_filename = f"{prefix_filename}.aac"
    if os.path.isfile(audio_filename):
        print(f'Audio file: "{audio_filename}" extracted!')
    else:
        print(f'Fail to extract audio file: "{audio_filename}"!')
elif mime_type and mime_type.startswith("audio/"):
    audio_filename = uploaded_filename
    print(f'Use audio file: "{audio_filename}"')
else:
    print("Unknown file type!")

In [None]:
# @title ### (opt) Download Audio
# @markdown Download the extracted audio file to your browser's default download path.

from google.colab import files


files.download(audio_filename)

In [None]:
# @title ## 4. Extract Subtitle

from faster_whisper import WhisperModel
from whisper.utils import get_writer
from dataclasses import asdict
import torch


compute_type: str = ""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    compute_type = "float16"
if device.type == "cpu":
    compute_type = "int8"

print("Download whisper model.")
model = WhisperModel("large-v3", device=device.type, compute_type=compute_type)

print("Transcribe audio.")
segments, _ = model.transcribe(
    audio_filename,
    beam_size=5,
    initial_prompt="Please use Simplified Chinese to display Chinese character.",
    vad_filter=True,
    vad_parameters={"min_silence_duration_ms": 500},
)

segs_lst: list = []
text: str = ""
for segment in segments:
    text += segment.text
    # segment_dict = segment._asdict()
    segment_dict = asdict(segment)
    segment_dict.pop("words")
    segs_lst.append(segment_dict)
result: dict[str, str | list] = {"text": text, "segments": segs_lst}

print("Save subtitle.")
# Use the `writer` method in openai-whisper to save the subtitle file to the current runtime environment.
writer = get_writer("srt", ".")
writer(result, prefix_filename)

In [None]:
# @title ## 5. Download Subtitle
# @markdown Download the subtitle file to your browser's default download path.

from google.colab import files


files.download(f"{prefix_filename}.srt")