# A5 小書產生器（拼音＋影片：gTTS／字幕／淡入淡出／BGM｜Voila 下載可用）
本 Notebook 以 **gTTS** 產生旁白、支援 **拼音**、**字幕燒入**、**淡入淡出**、**BGM**，
且在 **Voila／Binder** 下可直接下載 **DOCX／MP4**。封面與封底固定不加頁碼，內頁可勾選是否加頁碼（預設開）。

In [None]:
import io, os, re, base64, urllib.request, time, math, tempfile
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# --- 拼音：安全匯入 ---
try:
    from pypinyin import pinyin as pinyin_fn, Style
    HAVE_PINYIN = True
except Exception:
    HAVE_PINYIN = False
    pinyin_fn = None
    class _S: pass
    Style = _S()

# --- 文件處理 ---
from docx import Document
from docx.shared import Mm, Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

# --- 影像與字型 ---
from PIL import Image, ImageDraw, ImageFont, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# --- 視訊（moviepy＋ffmpeg）：安全匯入 ---
try:
    from moviepy.editor import ImageClip, AudioFileClip, CompositeAudioClip, concatenate_videoclips
    import numpy as np
    MOVIEPY_OK = True
    MOVIEPY_ERR = ''
except Exception as e:
    MOVIEPY_OK = False
    MOVIEPY_ERR = str(e)

# --- gTTS ---
try:
    from gtts import gTTS
    HAVE_GTTS = True
except Exception:
    HAVE_GTTS = False

# --- 下載按鈕：安全匯入 ---
try:
    from ipywidgets import FileDownload
    HAVE_FD = True
except Exception:
    HAVE_FD = False

FWP='％'  # 全形百分比

In [None]:
# ---------- 小工具 ----------
def ensure_rgb_jpeg(img_bytes, quality=92):
    im = Image.open(io.BytesIO(img_bytes))
    if im.mode not in ('RGB','L'):
        im = im.convert('RGB')
    out = io.BytesIO()
    im.save(out, format='JPEG', quality=quality)
    return out.getvalue()

def parse_and_strip_size_anywhere(text, default_pt=12):
    m = re.search(r"\[(\d+)pt\]", text)
    if m:
        size=int(m.group(1))
        return size, text[:m.start()]+text[m.end():]
    return default_pt, text

def has_skip_image_tag(text):
    return bool(re.search(r"\[\s*無圖\s*\]\s*$", text))

def strip_skip_image_tag(text):
    return re.sub(r"\[\s*無圖\s*\]\s*$", '', text)

def tokenize_line(line):
    pattern=re.compile(r"\[\s*左圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]\s*\[\s*右圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]|\[\s*圖片\s*(\d+)?\s*[%"+FWP+"]?\s*\]")
    tokens=[]; i=0
    while True:
        m=pattern.search(line,i)
        if not m:
            tokens.append(('text', line[i:])); break
        if m.start()>i: tokens.append(('text', line[i:m.start()]))
        if m.group(1) is not None or m.group(2) is not None:
            lp=int(m.group(1)) if m.group(1) else None
            rp=int(m.group(2)) if m.group(2) else None
            tokens.append(('double', lp, rp))
        else:
            sp=m.group(3); tokens.append(('single', int(sp) if sp else None))
        i=m.end()
    return tokens

def set_section_to_a5(section):
    section.page_width=Mm(148); section.page_height=Mm(210)
    section.left_margin=Mm(15); section.right_margin=Mm(15)
    section.top_margin=Mm(15); section.bottom_margin=Mm(15)

def set_page_number_start(section, start_at=1):
    sectPr = section._sectPr
    pgNumType = OxmlElement('w:pgNumType')
    pgNumType.set(qn('w:start'), str(int(start_at)))
    for el in list(sectPr):
        if el.tag == pgNumType.tag:
            sectPr.remove(el)
    sectPr.append(pgNumType)

def add_text_paragraph(doc, text, size_pt, center=False):
    text=text.strip()
    if not text: return None
    p=doc.add_paragraph(text)
    p.alignment=WD_ALIGN_PARAGRAPH.CENTER if center else WD_ALIGN_PARAGRAPH.JUSTIFY
    for r in p.runs: r.font.size=Pt(size_pt)
    return p

def add_single_image_paragraph(doc, img_bytes, width_pct=None, center=True):
    page_w_mm=148-15-15; page_w_in=page_w_mm/25.4
    if width_pct is None: width_in=page_w_in
    else:
        width_in=page_w_in*(width_pct/100.0); width_in=max(1.0, min(page_w_in, width_in))
    safe=ensure_rgb_jpeg(img_bytes)
    p=doc.add_paragraph(); p.alignment=WD_ALIGN_PARAGRAPH.CENTER if center else WD_ALIGN_PARAGRAPH.JUSTIFY
    p.add_run().add_picture(io.BytesIO(safe), width=Inches(width_in))

def add_double_image_table(doc, left_bytes, right_bytes, l_pct, r_pct):
    page_w_mm=148-15-15; page_w_in=page_w_mm/25.4; padding=0.2
    base_in=(page_w_in-padding)/2.0
    l_in=base_in if l_pct is None else base_in*(l_pct/100.0)
    r_in=base_in if r_pct is None else base_in*(r_pct/100.0)
    l_in=max(0.8, min(base_in, l_in)); r_in=max(0.8, min(base_in, r_in))
    table=doc.add_table(rows=1, cols=2); table.autofit=True
    for ci,b,w in [(0,left_bytes,l_in),(1,right_bytes,r_in)]:
        cell=table.rows[0].cells[ci]; para=cell.paragraphs[0]; para.alignment=WD_ALIGN_PARAGRAPH.CENTER
        para.add_run().add_picture(io.BytesIO(ensure_rgb_jpeg(b)), width=Inches(w))

def add_page_number_footer(section):
    footer=section.footer
    p=footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
    p.alignment=WD_ALIGN_PARAGRAPH.CENTER
    p.add_run('-'); fld_begin=OxmlElement('w:fldChar'); fld_begin.set(qn('w:fldCharType'),'begin')
    instr=OxmlElement('w:instrText'); instr.text=' PAGE '
    fld_end=OxmlElement('w:fldChar'); fld_end.set(qn('w:fldCharType'),'end')
    r=OxmlElement('w:r'); r.append(fld_begin); r.append(instr); r.append(fld_end); p._p.append(r); p.add_run('-')

def unlink_and_clear_header(section):
    try:
        section.header.is_linked_to_previous=False
        for p in section.header.paragraphs:
            for r in p.runs: r.text=''
    except Exception: pass

def unlink_and_clear_footer(section):
    try:
        section.footer.is_linked_to_previous=False
        for p in section.footer.paragraphs:
            for r in p.runs: r.text=''
    except Exception: pass


In [None]:
# ---------- 拼音 ----------
def charwise_pinyin(text: str) -> str:
    if not HAVE_PINYIN: return text
    out=[]
    for ch in text:
        if '\u4e00' <= ch <= '\u9fff':
            py = pinyin_fn(ch, style=Style.TONE, strict=False)
            py_s = py[0][0] if py and py[0] else ''
            out.append(f"{ch}({py_s})")
        else:
            out.append(ch)
    return ''.join(out)

def sentencewise_pinyin(text: str) -> str:
    if not HAVE_PINYIN: return text
    chinese_chars=[ch for ch in text if '\u4e00' <= ch <= '\u9fff']
    if not chinese_chars: return text
    py_list=pinyin_fn(''.join(chinese_chars), style=Style.TONE, strict=False)
    py_join=' '.join([itm[0] for itm in py_list if itm and itm[0]])
    return f"{text}（{py_join}）"

def apply_pinyin_mode(s: str) -> str:
    mode = dropdown_pinyin.value
    if mode == '字後拼音': return charwise_pinyin(s)
    if mode == '整句拼音': return sentencewise_pinyin(s)
    return s


In [None]:
# ---------- UI ----------
def image_manager(title: str):
    uploader = widgets.FileUpload(accept='image/*', multiple=True, description=f'上傳{title}')
    lst = widgets.Select(options=[], rows=6, description='順序')
    btn_up = widgets.Button(description='上移'); btn_down = widgets.Button(description='下移')
    btn_del = widgets.Button(description='刪除'); btn_clear = widgets.Button(description='清空')
    add_box = widgets.Textarea(placeholder='貼入圖片 URL 或 data:image/...;base64,... 或純 base64（每行一張）', layout=widgets.Layout(height='90px', width='100%'))
    auto_add = widgets.Checkbox(value=True, description='自動解析貼上（每行一張）')
    add_btn = widgets.Button(description='新增到清單', button_style='info', icon='plus')
    status = widgets.HTML('')
    images = []
    def refresh():
        lst.options=[f"{i+1:02d}. {it['name']} ({len(it['bytes'])//1024}KB)" for i,it in enumerate(images)]; lst.index=0 if images else None
    def on_upload(change):
        for meta in list(uploader.value):
            content=meta.get('content', b''); name=meta.get('name','image')
            if content: images.append({'name':name, 'bytes':content})
        refresh(); status.value=f"<span style='color:green'>已加入 {len(images)} 張</span>"
    uploader.observe(on_upload, names='value')
    def move(delta):
        if not images or lst.index is None: return
        i=lst.index; j=i+delta
        if 0<=j<len(images): images[i],images[j]=images[j],images[i]; refresh(); lst.index=j
    btn_up.on_click(lambda _: move(-1)); btn_down.on_click(lambda _: move(1))
    btn_del.on_click(lambda _: (images.pop(lst.index), refresh()) if (images and lst.index is not None) else None)
    btn_clear.on_click(lambda _: (images.clear(), refresh()))
    def _try_parse_line(ln):
        try:
            if ln.startswith('data:image'): return base64.b64decode(ln.split(',',1)[1])
            if ln.startswith('http://') or ln.startswith('https://'):
                with urllib.request.urlopen(ln) as resp: return resp.read()
            return base64.b64decode(ln, validate=True)
        except Exception: return None
    def do_add_from_text(_=None, *, auto=False):
        lines=[ln.strip() for ln in add_box.value.splitlines() if ln.strip()]; added=0; failed=0
        for ln in lines:
            b=_try_parse_line(ln)
            if b: images.append({'name':f'added_{len(images)+1}.jpg','bytes':b}); added+=1
            else: failed+=1
        if added: refresh()
        if not auto: add_box.value=''
        if added or failed:
            msg=f"成功 {added} 張" + (f"；失敗 {failed} 行" if failed else '')
            status.value=f"<span style='color:{'green' if added and not failed else '#a60' if added else '#a00'}'>{msg}</span>"
    add_btn.on_click(lambda _: do_add_from_text())
    def on_box_change(change):
        if auto_add.value and '\n' in (change['new'] or ''):
            do_add_from_text(auto=True)
    add_box.observe(on_box_change, names='value')
    ui = widgets.VBox([uploader, widgets.HBox([lst, widgets.VBox([btn_up, btn_down, btn_del, btn_clear])]), widgets.VBox([add_box, widgets.HBox([add_btn, auto_add])]), status])
    return ui, images

warns=[]
if not MOVIEPY_OK: warns.append(f"⚠️ moviepy 不可用：{MOVIEPY_ERR}（requirements 需含 moviepy 與 imageio-ffmpeg）")
if not HAVE_PINYIN: warns.append("⚠️ pypinyin 未安裝，拼音功能停用。")
if not HAVE_GTTS: warns.append("⚠️ gTTS 未安裝，旁白功能停用。")
banner = widgets.HTML('<br>'.join(warns)) if warns else widgets.HTML('')

txt_cover = widgets.Textarea(value='神奇的迴力鏢[18pt]\n[圖片50%]\n種是希望', description='封面文字', layout=widgets.Layout(width='100%', height='150px'))
txt_body  = widgets.Textarea(value='第一段文字。\n\n第二段文字。\n\n第三段文字。', description='內頁文字', layout=widgets.Layout(width='100%', height='200px'))
txt_back  = widgets.Textarea(value='封底簡介……\n[左圖60%][右圖60%]', description='封底文字', layout=widgets.Layout(width='100%', height='150px'))

cover_default_pct = widgets.BoundedIntText(value=100, min=10, max=100, step=1, description='封面預設%')
body_default_pct  = widgets.BoundedIntText(value=100, min=10, max=100, step=1, description='內頁預設%')
back_default_pct  = widgets.BoundedIntText(value=100, min=10, max=100, step=1, description='封底預設%')

chk_body_pagenum = widgets.Checkbox(value=True, description='內頁加頁碼 (-1-)')
chk_pagebreak = widgets.Checkbox(value=False, description='每段文字自動換頁')
dropdown_pinyin = widgets.Dropdown(options=['不加拼音','字後拼音','整句拼音'], value='不加拼音', description='拼音模式')

chk_make_video = widgets.Checkbox(value=False, description='是否產生影片 (mp4)')
chk_video_cover = widgets.Checkbox(value=True, description='將封面加入影片')
chk_video_back  = widgets.Checkbox(value=True, description='將封底加入影片')
vid_w = widgets.BoundedIntText(value=1280, min=640, max=3840, step=10, description='寬(px)')
vid_h = widgets.BoundedIntText(value=720,  min=360, max=2160, step=10, description='高(px)')
vid_fps = widgets.BoundedIntText(value=24, min=10, max=60, step=1, description='FPS')
sec_per_char = widgets.FloatText(value=0.12, description='每字秒數(估)')
fade_dur = widgets.FloatText(value=0.5, description='淡入/淡出秒數')

bgm_upload = widgets.FileUpload(accept='audio/*', multiple=False, description='上傳背景音樂')
bgm_vol = widgets.FloatSlider(value=0.2, min=0.0, max=1.0, step=0.05, description='BGM音量')
voice_vol = widgets.FloatSlider(value=1.0, min=0.0, max=1.5, step=0.05, description='人聲音量')
voice_lang = widgets.Dropdown(options=['zh-TW','zh-CN','en'], value='zh-TW', description='語音語系')
voice_tld = widgets.Dropdown(options=['com','com.hk','com.tw'], value='com.tw', description='語音地區')

ui_cover, imgs_cover = image_manager('封面圖片（可多張）')
ui_body,  imgs_body  = image_manager('內頁圖片（可多張）')
ui_back,  imgs_back  = image_manager('封底圖片（可多張）')

btn_doc  = widgets.Button(description='生成 A5 小書（docx）', button_style='success')
btn_vid  = widgets.Button(description='生成影片（mp4）', button_style='primary')
out = widgets.Output()

display(widgets.VBox([
    banner,
    widgets.HBox([widgets.VBox([txt_cover, cover_default_pct]), widgets.VBox([dropdown_pinyin, chk_pagebreak])]),
    ui_cover,
    widgets.Label('—— 內頁 ——'),
    txt_body,
    widgets.HBox([body_default_pct, chk_body_pagenum]),
    ui_body,
    widgets.Label('—— 封底 ——'),
    txt_back,
    back_default_pct,
    ui_back,
    widgets.Label('—— 影片 ——'),
    widgets.HBox([chk_make_video, chk_video_cover, chk_video_back]),
    widgets.HBox([vid_w, vid_h, vid_fps, sec_per_char, fade_dur]),
    widgets.HBox([bgm_upload, bgm_vol]),
    widgets.HBox([voice_lang, voice_tld, voice_vol]),
    widgets.HBox([btn_doc, btn_vid]),
    out
]))

In [None]:
# ---------- DOCX 產生 ----------
def emit_stream(doc, text, size_pt, imgs, default_pct, center=False):
    skip_img = has_skip_image_tag(text); text = strip_skip_image_tag(text)
    tokens = tokenize_line(text)
    for tk in tokens:
        if tk[0]=='text':
            add_text_paragraph(doc, apply_pinyin_mode(tk[1]), size_pt, center=center)
        elif tk[0]=='single' and (not skip_img) and imgs:
            pct = tk[1] if tk[1] is not None else (default_pct or 100)
            add_single_image_paragraph(doc, imgs.pop(0), width_pct=pct, center=True)
        elif tk[0]=='double' and (not skip_img) and len(imgs)>=2:
            lp, rp = tk[1], tk[2]
            add_double_image_table(doc, imgs.pop(0), imgs.pop(0), lp, rp)

def build_docx_bytes():
    cover_text = txt_cover.value; body_text = txt_body.value; back_text = txt_back.value
    cover_imgs = [it['bytes'] for it in imgs_cover]
    body_imgs  = [it['bytes'] for it in imgs_body]
    back_imgs  = [it['bytes'] for it in imgs_back]
    cover_pct=int(cover_default_pct.value or 100); body_pct=int(body_default_pct.value or 100); back_pct=int(back_default_pct.value or 100)

    # 封面（無頁碼）
    doc = Document(); set_section_to_a5(doc.sections[0])
    cover_size, cover_text2 = parse_and_strip_size_anywhere(cover_text, default_pt=18)
    emit_stream(doc, cover_text2, cover_size, cover_imgs, cover_pct, center=True)
    if cover_imgs: add_single_image_paragraph(doc, cover_imgs.pop(0), width_pct=cover_pct, center=True)

    # 內頁（可加頁碼）
    doc.add_section(); set_section_to_a5(doc.sections[-1]); set_page_number_start(doc.sections[-1], 1)
    if chk_body_pagenum.value: add_page_number_footer(doc.sections[-1])
    raw_lines = body_text.split('\n')
    non_empty = [ln for ln in raw_lines if ln.strip()!='']
    total = len(non_empty); done = 0
    for raw in raw_lines:
        line = raw.strip()
        if line=='': doc.add_paragraph(''); continue
        size_pt, text = parse_and_strip_size_anywhere(line, default_pt=12)
        tokens = tokenize_line(text)
        has_tag = any(tk[0] != 'text' for tk in tokens)
        emit_stream(doc, text, size_pt, body_imgs, body_pct, center=False)
        if (not has_tag) and body_imgs:
            add_single_image_paragraph(doc, body_imgs.pop(0), width_pct=body_pct, center=True)
        done += 1
        if chk_pagebreak.value and done < total:
            doc.add_page_break()

    # 封底（無頁碼）
    doc.add_section(); set_section_to_a5(doc.sections[-1])
    unlink_and_clear_header(doc.sections[-1]); unlink_and_clear_footer(doc.sections[-1])
    back_size, back_text2 = parse_and_strip_size_anywhere(back_text, default_pt=12)
    emit_stream(doc, back_text2, back_size, back_imgs, back_pct, center=True)
    if back_imgs: add_single_image_paragraph(doc, back_imgs.pop(0), width_pct=back_pct, center=True)

    buf = io.BytesIO(); doc.save(buf); data = buf.getvalue()
    _ = Document(io.BytesIO(data))
    return data


In [None]:
# ---------- 字幕（textbbox） ----------
def pick_cjk_font(size_px):
    candidates = [
        '/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc',
        '/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc',
        '/usr/share/fonts/truetype/noto/NotoSansCJK.ttc',
        '/usr/share/fonts/truetype/arphic/ukai.ttc',
        '/usr/share/fonts/truetype/arphic/uming.ttc',
    ]
    for p in candidates:
        if os.path.exists(p):
            try:
                return ImageFont.truetype(p, size=size_px)
            except Exception:
                pass
    return ImageFont.load_default()

def text_size(draw, s, font):
    l,t,r,b = draw.textbbox((0,0), s, font=font)
    return r-l, b-t

def wrap_text_by_width(draw, text, font, max_width):
    lines=[]; cur=''
    for ch in text:
        w,_ = text_size(draw, cur+ch, font)
        if w<=max_width: cur+=ch
        else:
            if cur: lines.append(cur)
            cur=ch
    if cur: lines.append(cur)
    return lines

def burn_subtitle_onto_image(img_rgba, text):
    w, h = img_rgba.size
    base_h=720.0
    fs = max(14, int(28*h/base_h))
    font = pick_cjk_font(fs)
    overlay = Image.new('RGBA', (w, h), (0,0,0,0))
    draw = ImageDraw.Draw(overlay)
    margin_x = int(w*0.06)
    max_w = w - 2*margin_x
    lines = wrap_text_by_width(draw, text, font, max_w)
    _,line_h = text_size(draw, '測', font); line_h = int(line_h*1.3)
    total_h = line_h*len(lines)
    y = h - total_h - int(h*0.08)
    for i,l in enumerate(lines):
        ww,_ = text_size(draw, l, font)
        x = (w - ww)//2
        draw.text((x+2,y+2+i*line_h), l, font=font, fill=(0,0,0,100))
        draw.text((x,y+i*line_h), l, font=font, fill=(255,255,255,255))
    return Image.alpha_composite(img_rgba, overlay)


In [None]:
# ---------- 影片（gTTS 旁白、淡入淡出、BGM） ----------
def estimate_duration_by_text(text: str, sec_per_char: float = 0.12, min_sec=1.0, max_sec=12.0):
    n = max(1, len([c for c in text if not c.isspace()]))
    dur = n * max(0.02, sec_per_char)
    return float(max(min_sec, min(max_sec, dur)))

def tts_to_mp3(text: str, dst_path: str, lang='zh-TW', tld='com.tw'):
    if not HAVE_GTTS:
        return False
    try:
        gTTS(text=text, lang=lang, tld=tld).save(dst_path)
        return os.path.exists(dst_path) and os.path.getsize(dst_path) > 0
    except Exception:
        return False

def compose_frame_image(img_bytes, width, height, text_to_burn=None):
    if img_bytes is None:
        base = Image.new('RGBA', (width, height), (255,255,255,255))
    else:
        base = Image.open(io.BytesIO(img_bytes)).convert('RGBA').resize((width, height))
    if text_to_burn:
        base = burn_subtitle_onto_image(base, text_to_burn)
    buf = io.BytesIO(); base.convert('RGB').save(buf, format='JPEG', quality=95)
    return buf.getvalue()

def make_clip_from_bytes(img_bytes, width, height, duration, fade_seconds=0.5):
    arr = np.array(Image.open(io.BytesIO(img_bytes)).convert('RGB'))
    clip = ImageClip(arr).set_duration(duration)
    if fade_seconds>0:
        clip = clip.fadein(fade_seconds).fadeout(fade_seconds)
    return clip

def build_video_mp4(save_path: str, include_cover=True, include_back=True, sec_per_char_val=0.12, width=1280, height=720, fps=24,
                    fade_seconds=0.5, burn_subtitles=True, bgm_bytes=None, bgm_volume=0.2, voice_volume=1.0, lang='zh-TW', tld='com.tw'):
    if not MOVIEPY_OK:
        raise RuntimeError(f'moviepy 無法使用：{MOVIEPY_ERR}')

    cover_text = txt_cover.value.strip(); body_text = txt_body.value; back_text = txt_back.value.strip()
    cover_imgs = [it['bytes'] for it in imgs_cover]
    body_imgs  = [it['bytes'] for it in imgs_body]
    back_imgs  = [it['bytes'] for it in imgs_back]

    segments = []
    def maybe_pinyin(s): return apply_pinyin_mode(s)
    if include_cover and (cover_text or cover_imgs):
        segments.append((maybe_pinyin(cover_text), cover_imgs[0] if cover_imgs else None))
    lines = [ln.strip() for ln in body_text.split('\n') if ln.strip()!='']
    for i, line in enumerate(lines):
        segments.append((maybe_pinyin(line), body_imgs[i] if i < len(body_imgs) else None))
    if include_back and (back_text or back_imgs):
        segments.append((maybe_pinyin(back_text), back_imgs[0] if back_imgs else None))

    clips = []
    tmp_files = []
    try:
        for idx, (tx, img) in enumerate(segments):
            duration = estimate_duration_by_text(tx, sec_per_char=sec_per_char_val)
            frame_bytes = compose_frame_image(img, width, height, text_to_burn=(tx if burn_subtitles and tx else None))
            clip = make_clip_from_bytes(frame_bytes, width, height, duration, fade_seconds)

            mp3_path = os.path.join(tempfile.gettempdir(), f'voice_{int(time.time()*1000)}_{idx}.mp3')
            if tts_to_mp3(tx, mp3_path, lang=lang, tld=tld):
                try:
                    vc = AudioFileClip(mp3_path).volumex(max(0.0, voice_volume)).set_duration(duration)
                    clip = clip.set_audio(vc)
                    tmp_files.append(mp3_path)
                except Exception:
                    pass

            clips.append(clip)

        final = concatenate_videoclips(clips, method='chain')

        if bgm_bytes is not None:
            try:
                bgm_path = os.path.join(tempfile.gettempdir(), f'bgm_{int(time.time()*1000)}.mp3')
                with open(bgm_path, 'wb') as f: f.write(bgm_bytes)
                bgm = AudioFileClip(bgm_path).volumex(max(0.0, bgm_volume)).set_duration(final.duration)
                if final.audio is not None:
                    final = final.set_audio(CompositeAudioClip([final.audio, bgm]))
                else:
                    final = final.set_audio(bgm)
                tmp_files.append(bgm_path)
            except Exception:
                pass

        final.write_videofile(save_path, fps=fps, codec='libx264', audio_codec='aac')
    finally:
        for p in tmp_files:
            try: os.remove(p)
            except Exception: pass
    return save_path


In [None]:
# ---------- 事件處理（下載可用） ----------
btn_doc, btn_vid

def on_click_doc(_):
    with out:
        clear_output()
        try:
            data = build_docx_bytes(); fname = f'a5_book_{int(time.time())}.docx'
            b64=base64.b64encode(data).decode()
            display(HTML('<div style="color:green;">DOCX 完成！</div>'))
            display(HTML(f'<a download="{fname}" href="data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,{b64}">📥 點此下載 {fname}</a>'))
        except Exception as e:
            display(HTML(f'<div style=\'color:red;\'>失敗：{e}</div>'))

def on_click_video(_):
    with out:
        clear_output()
        if not chk_make_video.value:
            display(HTML('<div style="color:#a60;">未勾選「是否產生影片」，已略過。</div>'))
            return
        try:
            if not MOVIEPY_OK:
                raise RuntimeError('moviepy 尚不可用，請在 requirements.txt 加入 moviepy 與 imageio-ffmpeg。')
            os.makedirs('output', exist_ok=True)
            mp4_name = f'output/a5_book_{int(time.time())}.mp4'
            bgm_bytes = None
            try:
                items = list(bgm_upload.value)
                if items:
                    bgm_bytes = items[0].get('content', None)
            except Exception:
                pass
            save = build_video_mp4(
                save_path=mp4_name,
                include_cover=chk_video_cover.value,
                include_back=chk_video_back.value,
                sec_per_char_val=float(sec_per_char.value or 0.12),
                width=int(vid_w.value), height=int(vid_h.value), fps=int(vid_fps.value),
                fade_seconds=float(fade_dur.value or 0.5), burn_subtitles=True,
                bgm_bytes=bgm_bytes, bgm_volume=float(bgm_vol.value or 0.2), voice_volume=float(voice_vol.value or 1.0),
                lang=voice_lang.value, tld=voice_tld.value
            )
            display(HTML('<div style="color:green;">影片完成（含字幕／淡入淡出）！</div>'))
            files_href = f'files/{save}'
            if 'FileDownload' in globals() and HAVE_FD:
                with open(save, 'rb') as f:
                    btn = FileDownload(data=f.read(), filename=os.path.basename(save), description='下載影片 (MP4)')
                display(btn)
            display(HTML(f'<a href="{files_href}" download>或點此下載 (MP4)</a>'))
        except Exception as e:
            display(HTML(f'<div style=\'color:red;\'>製作影片失敗：{e}</div>'))

btn_doc.on_click(on_click_doc)
btn_vid.on_click(on_click_video)
