<a href="https://colab.research.google.com/github/detektor777/colab_list/blob/main/embed_subtitles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Go to Google Drive and upload the video file to the root of the drive

https://drive.google.com/drive/


In [None]:
#@title ##**Enter video file name** { display-mode: "form" }
from google.colab import files
inptut = input("Enter the file name and press enter (default: input.mp4) ") or "input.mp4"
video_filename = f"/content/drive/MyDrive/{inptut}"
file_name, file_extension = inptut.rsplit('.', 1)

In [None]:
#@title ##**Connecting to Google Drive** { display-mode: "form" }
from google.colab import drive

drive.mount('/content/drive')
!cp "{output_filename}" "/content/drive/My Drive/"

In [None]:
#@title ##**Install** { display-mode: "form" }
!apt-get install -y ffmpeg > /dev/null 2>&1
!pip install chardet

In [None]:
#@title ##**Upload subtitles** { display-mode: "form" }
print("Please upload the subtitle file for the video")
uploaded_subtitles = files.upload()

subtitle_filename = next(iter(uploaded_subtitles))
print(f"Subtitle file '{subtitle_filename}' downloaded successfully")

In [None]:
#@title ##**Run** { display-mode: "form" }

import subprocess
import re
import os
import chardet

# Paths to files
output_filename = f"/content/drive/MyDrive/{file_name}_output.mp4"

# Detect file encoding
def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        raw_data = f.read()
    result = chardet.detect(raw_data)
    return result['encoding']

# Convert subtitles to UTF-8 if needed
def convert_subtitles_to_utf8(subtitle_filename):
    encoding = detect_encoding(subtitle_filename)
    if encoding.lower() != 'utf-8':
        print(f"Converting subtitles from {encoding} to UTF-8")
        with open(subtitle_filename, 'r', encoding=encoding) as f:
            subtitle_content = f.read()
        with open(subtitle_filename, 'w', encoding='utf-8') as f:
            f.write(subtitle_content)
        print(f"Subtitle file '{subtitle_filename}' successfully converted to UTF-8.")
    else:
        print(f"Subtitle file '{subtitle_filename}' is already in UTF-8.")

# Get video duration using ffmpeg
def get_video_duration(video_file):
    result = subprocess.run(
        ["ffmpeg", "-i", video_file],
        stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True
    )
    duration_match = re.search(r"Duration: (\d+):(\d+):(\d+).(\d+)", result.stderr)
    if duration_match:
        hours, minutes, seconds, _ = map(int, duration_match.groups())
        return hours * 3600 + minutes * 60 + seconds
    return None

# Get video bitrate using ffmpeg
def get_video_bitrate(video_file):
    result = subprocess.run(
        ["ffmpeg", "-i", video_file],
        stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True
    )
    bitrate_match = re.search(r"bitrate: (\d+) kb/s", result.stderr)
    if bitrate_match:
        return int(bitrate_match.group(1))
    return None

# Add subtitles with automatic encoding detection and show progress
def add_subtitles_with_progress(video_filename, subtitle_filename, output_filename):
    total_duration = get_video_duration(video_filename)
    if total_duration is None:
        print("Failed to get video duration.")
        return

    # Convert subtitles to UTF-8 if necessary
    convert_subtitles_to_utf8(subtitle_filename)

    # Get the original video bitrate
    original_bitrate = get_video_bitrate(video_filename)
    if original_bitrate is None:
        print("Failed to get video bitrate. Using default.")
    else:
        print(f"Original bitrate: {original_bitrate} kb/s")

    # ffmpeg command with detailed error logging
    ffmpeg_command = [
        "ffmpeg", "-i", video_filename, "-vf",
        f"subtitles={subtitle_filename}",  # Subtitles filter
        "-c:v", "libx264",  # Use H.264 codec for video
        "-c:a", "aac",  # Use AAC codec for audio
        "-b:v", f"{original_bitrate}k",  # Maintain original bitrate
        output_filename, "-y"
    ]

    process = subprocess.Popen(
        ffmpeg_command,
        stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True
    )

    time_pattern = re.compile(r'time=(\d+):(\d+):(\d+).(\d+)')

    # Collect detailed stderr output
    stderr_output = []

    while True:
        line = process.stderr.readline()
        if not line:
            break
        stderr_output.append(line)
        match = time_pattern.search(line)
        if match:
            hours, minutes, seconds, _ = map(int, match.groups())
            current_time = hours * 3600 + minutes * 60 + seconds
            progress = (current_time / total_duration) * 100
            # Use '\r' to overwrite the progress line
            print(f"\rProgress: {progress:.2f}%", end="")

    process.wait()

    # Print detailed error information if the process fails
    if process.returncode != 0:
        print("\nError during processing:")
        for error_line in stderr_output:
            print(error_line.strip())
    else:
        print("\nProcessing complete.")
        print(f"The processed video is saved as '{output_filename}'.")


# Run
add_subtitles_with_progress(video_filename, subtitle_filename, output_filename)



In [None]:
#@title ##**Download video** { display-mode: "form" }
from google.colab import files

files.download(output_filename)