🎬 1. Extract Audio from Video

In [1]:
%pip install moviepy speechrecognition pydub

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from moviepy.editor import VideoFileClip

video = VideoFileClip("Subtitle-Video/video/Demo-Original.mp4")
video.audio.write_audiofile("Subtitle-Video/temp_audio/temp_audio.wav")

MoviePy - Writing audio in Subtitle-Video/temp_audio/temp_audio.wav


                                                        

MoviePy - Done.




🧠 2. Transcribe Audio to Text (Speech Recognition)

In [3]:
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence

In [4]:
recognizer = sr.Recognizer()
sound = AudioSegment.from_wav("Subtitle-Video/temp_audio/temp_audio.wav")

In [5]:
chunks = split_on_silence(sound, min_silence_len=700, silence_thresh=sound.dBFS-14)

In [6]:
subtitles = []
start_time = 0

In [7]:
for i, chunk in enumerate(chunks):
    chunk_silent = AudioSegment.silent(duration=500)
    audio_chunk = chunk_silent + chunk + chunk_silent
    chunk_filename = f"Subtitle-Video/temp_audio/chunk{i}.wav"
    audio_chunk.export(chunk_filename, format="wav")

    with sr.AudioFile(chunk_filename) as source:
        audio = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio)
            end_time = start_time + len(audio_chunk) / 1000.0
            subtitles.append((start_time, end_time, text))
            start_time = end_time
        except sr.UnknownValueError:
            continue

💬 3. Burn Subtitles into Video

In [8]:
from moviepy.editor import TextClip, CompositeVideoClip

In [9]:
# import moviepy.config as mpy_config
# mpy_config.change_settings({"IMAGEMAGICK_BINARY": r"C:\Program Files\ImageMagick-7.1.1-Q16-HDRI\convert.exe"})

In [10]:
from moviepy.config import change_settings
change_settings({"IMAGEMAGICK_BINARY": r"C:\\Program Files\\ImageMagick-7.1.1-Q16-HDRI\\magick.exe"})

In [11]:
from moviepy.editor import TextClip
# solution to MoviePy not found error: https://stackoverflow.com/questions/51928807/moviepy-cant-detect-imagemagick-binary-on-windows

clip = TextClip("Hello, world!", fontsize=70, color='white', bg_color='black')
clip.save_frame("test_output.png")

In [12]:
subtitle_clips = []
for start, end, text in subtitles:
    txt_clip = (TextClip(text, fontsize=24, color='white', bg_color='black', method="caption")
                .set_position(("center", "bottom"))
                .set_start(start)
                .set_duration(end - start))
    subtitle_clips.append(txt_clip)

In [13]:
final_video = CompositeVideoClip([video] + subtitle_clips)
final_video.write_videofile("Subtitle-Video/output/Output-Demo.mp4", codec="libx264")

Moviepy - Building video Subtitle-Video/output/Output-Demo.mp4.
MoviePy - Writing audio in Output-DemoTEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
Moviepy - Writing video Subtitle-Video/output/Output-Demo.mp4



                                                               

Moviepy - Done !
Moviepy - video ready Subtitle-Video/output/Output-Demo.mp4
