# 🎥 Tạo Video Giảng Bài từ Slide PDF + Âm Thanh + Phụ Đề

Notebook này giúp bạn tự động:
- Chia slide từ PDF
- Căn thời gian dựa vào `slide_marks.txt`
- Chèn âm thanh nền `.wav`
- Chèn phụ đề `.srt`
- Xuất ra file `.mp4` hoàn chỉnh

In [44]:
!pip install moviepy pysrt pdf2image PyMuPDF
!apt-get install -y poppler-utils

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.8).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [35]:
from google.colab import files
uploaded = files.upload()

In [46]:
from pdf2image import convert_from_path
import os
os.makedirs("slides", exist_ok=True)
slides = convert_from_path("slide.pdf", dpi=150)
for i, slide in enumerate(slides):
    slide.save(f"slides/slide_{i+1}.png", 'PNG')

In [49]:
def parse_time(t):
    h, m, s_ms = t.split(":")
    s, ms = map(int, s_ms.replace(",", ".").split("."))
    return int(h) * 3600 + int(m) * 60 + s + ms / 1000

# Đọc file mốc thời gian chuyển slide
with open("slide_transitions.txt", "r", encoding="utf-8") as f:
    slide_times = []
    for line in f:
        if "-->" in line:
            # Tách lấy phần sau dấu '-->'
            time_str = line.strip().split(" --> ")[1]
            slide_times.append(time_str)

# Chuyển đổi thời gian sang giây (float)
slide_seconds = [parse_time(t) for t in slide_times]
print(slide_seconds)


[0.0, 62.039, 113.56, 165.6, 203.0, 253.879, 290.199]


In [50]:
from moviepy.editor import *
import pysrt

# Tải âm thanh trước để lấy độ dài
audio = AudioFileClip("audio.wav")

clips = []
for i in range(len(slide_seconds)):
    start = slide_seconds[i]
    # Nếu là slide cuối, đặt end = độ dài audio
    end = slide_seconds[i+1] if i+1 < len(slide_seconds) else audio.duration
    dur = end - start
    img = ImageClip(f"slides/slide_{i+1}.png").set_duration(dur).resize(height=720)
    clips.append(img)

video = concatenate_videoclips(clips).set_audio(audio)


In [51]:
from moviepy.editor import *
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pysrt
import textwrap
import matplotlib.font_manager as fm

# Load phụ đề
subs = pysrt.open("script.srt")
subclips = []

# Cấu hình font ổn định
font_path = fm.findfont("DejaVu Sans")
font = ImageFont.truetype(font_path, 32)

W, H = 1280, 720  # Kích thước video

def make_subtitle_img(text, duration):
    lines = textwrap.wrap(text, width=50)
    img = Image.new("RGB", (W, 100), color="black")
    draw = ImageDraw.Draw(img)
    y_text = 10
    for line in lines:
        bbox = draw.textbbox((0, 0), line, font=font)
        w = bbox[2] - bbox[0]
        h = bbox[3] - bbox[1]
        draw.text(((W - w) / 2, y_text), line, font=font, fill="white")
        y_text += h + 5
    return ImageClip(np.array(img)).set_duration(duration).set_position(("center", "bottom"))

# Tạo từng subtitle clip
for sub in subs:
    start = sub.start.ordinal / 1000
    end = sub.end.ordinal / 1000
    duration = end - start
    subclip = make_subtitle_img(sub.text, duration).set_start(start).set_end(end)
    subclips.append(subclip)


In [52]:
final = CompositeVideoClip([video] + subclips)
final.write_videofile("video_final.mp4", fps=24)


Moviepy - Building video video_final.mp4.
MoviePy - Writing audio in video_finalTEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video video_final.mp4





Moviepy - Done !
Moviepy - video ready video_final.mp4


In [None]:
from google.colab import files
files.download("cheo_hoa_ma_tran_final.mp4")

FileNotFoundError: Cannot find file: cheo_hoa_ma_tran_final.mp4