# A5 小書產生器（拼音模式＋選配影片）
— 以文字與影像編織一本可列印的小書（DOCX），並可選擇輸出對應的影片（MP4）。
— 拼音模式：不加／字後／整句。影片段落長度依文字自動估算；封面、封底可各別納入影片。


In [None]:
import io, os, re, base64, urllib.request, time, math, tempfile
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from pypinyin import pinyin as pinyin_fn, Style
from docx import Document
from docx.shared import Mm, Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from PIL import Image

FWP='％'


In [None]:
# 視訊元件（MoviePy）
try:
    from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
    import numpy as np
    MOVIEPY_OK = True
except Exception as e:
    MOVIEPY_OK = False
    MOVIEPY_ERR = str(e)


In [None]:
# ---- 文字／圖片工具 ----
def ensure_rgb_jpeg(img_bytes, quality=90):
    from PIL import Image, ImageFile
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    im = Image.open(io.BytesIO(img_bytes))
    if im.mode not in ('RGB','L'): im = im.convert('RGB')
    out = io.BytesIO(); im.save(out, format='JPEG', quality=quality); return out.getvalue()

def parse_and_strip_size_anywhere(text, default_pt=12):
    m = re.search(r"\[(\d+)pt\]", text)
    if m: size=int(m.group(1)); return size, text[:m.start()]+text[m.end():]
    return default_pt, text

def has_skip_image_tag(text):
    return bool(re.search(r"\[\s*無圖\s*\]\s*$", text))

def strip_skip_image_tag(text):
    return re.sub(r"\[\s*無圖\s*\]\s*$", '', text)

def tokenize_line(line):
    tokens=[]; i=0
    pattern=re.compile(r"\[\s*左圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]\s*\[\s*右圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]|\[\s*圖片\s*(\d+)?\s*[%"+FWP+"]?\s*\]")
    while True:
        m=pattern.search(line,i)
        if not m:
            tokens.append(('text', line[i:])); break
        if m.start()>i: tokens.append(('text', line[i:m.start()]))
        if m.group(1) is not None or m.group(2) is not None:
            lp=int(m.group(1)) if m.group(1) else None; rp=int(m.group(2)) if m.group(2) else None
            tokens.append(('double', lp, rp))
        else:
            sp=m.group(3); tokens.append(('single', int(sp) if sp else None))
        i=m.end()
    return tokens

def set_section_to_a5(section):
    section.page_width=Mm(148); section.page_height=Mm(210)
    section.left_margin=Mm(15); section.right_margin=Mm(15)
    section.top_margin=Mm(15); section.bottom_margin=Mm(15)

def add_text_paragraph(doc, text, size_pt, center=False):
    text=text.strip()
    if not text: return None
    p=doc.add_paragraph(text)
    p.alignment=WD_ALIGN_PARAGRAPH.CENTER if center else WD_ALIGN_PARAGRAPH.JUSTIFY
    for r in p.runs: r.font.size=Pt(size_pt)
    return p

def add_single_image_paragraph(doc, img_bytes, width_pct=None, center=True):
    page_w_mm=148-15-15; page_w_in=page_w_mm/25.4
    if width_pct is None: width_in=page_w_in
    else:
        width_in=page_w_in*(width_pct/100.0); width_in=max(1.0, min(page_w_in, width_in))
    safe=ensure_rgb_jpeg(img_bytes)
    p=doc.add_paragraph(); p.alignment=WD_ALIGN_PARAGRAPH.CENTER if center else WD_ALIGN_PARAGRAPH.JUSTIFY
    p.add_run().add_picture(io.BytesIO(safe), width=Inches(width_in))

def add_double_image_table(doc, left_bytes, right_bytes, l_pct, r_pct):
    page_w_mm=148-15-15; page_w_in=page_w_mm/25.4; padding=0.2
    base_in=(page_w_in-padding)/2.0
    l_in=base_in if l_pct is None else base_in*(l_pct/100.0)
    r_in=base_in if r_pct is None else base_in*(r_pct/100.0)
    l_in=max(0.8, min(base_in, l_in)); r_in=max(0.8, min(base_in, r_in))
    table=doc.add_table(rows=1, cols=2); table.autofit=True
    for ci,b,w in [(0,left_bytes,l_in),(1,right_bytes,r_in)]:
        cell=table.rows[0].cells[ci]; para=cell.paragraphs[0]; para.alignment=WD_ALIGN_PARAGRAPH.CENTER
        para.add_run().add_picture(io.BytesIO(ensure_rgb_jpeg(b)), width=Inches(w))

def add_page_number_footer(section):
    footer=section.footer; p=footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph(); p.alignment=WD_ALIGN_PARAGRAPH.CENTER
    p.add_run('-'); fld_begin=OxmlElement('w:fldChar'); fld_begin.set(qn('w:fldCharType'),'begin')
    instr=OxmlElement('w:instrText'); instr.text=' PAGE '
    fld_end=OxmlElement('w:fldChar'); fld_end.set(qn('w:fldCharType'),'end')
    r=OxmlElement('w:r'); r.append(fld_begin); r.append(instr); r.append(fld_end); p._p.append(r); p.add_run('-')


In [None]:
# ---- 拼音模式 ----
def charwise_pinyin(text: str) -> str:
    out=[]
    for ch in text:
        if '\u4e00' <= ch <= '\u9fff':
            py = pinyin_fn(ch, style=Style.TONE, strict=False)
            py_s = py[0][0] if py and py[0] else ''
            out.append(f"{ch}({py_s})")
        else:
            out.append(ch)
    return ''.join(out)

def sentencewise_pinyin(text: str) -> str:
    chinese_chars=[ch for ch in text if '\u4e00' <= ch <= '\u9fff']
    if not chinese_chars: return text
    py_list=pinyin_fn(''.join(chinese_chars), style=Style.TONE, strict=False)
    py_join=' '.join([itm[0] for itm in py_list if itm and itm[0]])
    return f"{text}（{py_join}）"


In [None]:
# ---- 介面元件 ----
def image_manager(title: str):
    uploader = widgets.FileUpload(accept='image/*', multiple=True, description=f'上傳{title}')
    lst = widgets.Select(options=[], rows=6, description='順序')
    btn_up = widgets.Button(description='上移'); btn_down = widgets.Button(description='下移')
    btn_del = widgets.Button(description='刪除'); btn_clear = widgets.Button(description='清空')
    add_box = widgets.Textarea(placeholder='貼入圖片 URL 或 data:image/...;base64,... 或純 base64（每行一張）', layout=widgets.Layout(height='70px', width='100%'))
    add_btn = widgets.Button(description='新增到清單', button_style='info')
    status = widgets.HTML('')
    images = []
    def refresh():
        lst.options=[f"{i+1:02d}. {it['name']} ({len(it['bytes'])//1024}KB)" for i,it in enumerate(images)]; lst.index=0 if images else None
    def on_upload(change):
        nonlocal images
        for meta in uploader.value:
            content=meta.get('content', b''); name=meta.get('name','image')
            if content: images.append({'name':name, 'bytes':content})
        uploader.value.clear(); refresh(); status.value=f"<span style='color:green'>已加入 {len(images)} 張</span>"
    uploader.observe(on_upload, names='value')
    def move(delta):
        if not images or lst.index is None: return
        i=lst.index; j=i+delta
        if 0<=j<len(images): images[i],images[j]=images[j],images[i]; refresh(); lst.index=j
    btn_up.on_click(lambda _: move(-1)); btn_down.on_click(lambda _: move(1))
    btn_del.on_click(lambda _: (images.pop(lst.index), refresh()) if (images and lst.index is not None) else None)
    btn_clear.on_click(lambda _: (images.clear(), refresh()))
    def do_add_from_text(_):
        nonlocal images
        lines=[ln.strip() for ln in add_box.value.splitlines() if ln.strip()]; added=0
        for ln in lines:
            try:
                if ln.startswith('data:image'): b=base64.b64decode(ln.split(',',1)[1])
                elif ln.startswith('http://') or ln.startswith('https://'):
                    with urllib.request.urlopen(ln) as resp: b=resp.read()
                else:
                    b=base64.b64decode(ln, validate=True)
                images.append({'name':f'added_{len(images)+1}.jpg','bytes':b}); added+=1
            except Exception:
                pass
        add_box.value=''; refresh(); status.value=f"<span style='color:green'>新增 {added} 張</span>" if added else "<span style='#a00'>沒有成功新增</span>"
    add_btn.on_click(do_add_from_text)
    ui = widgets.VBox([uploader, widgets.HBox([lst, widgets.VBox([btn_up, btn_down, btn_del, btn_clear])]), widgets.HBox([add_box, widgets.VBox([add_btn])]), status])
    return ui, images

txt_cover = widgets.Textarea(value='書名或口號[18pt]\n[圖片50%]\n種是希望', description='封面文字', layout=widgets.Layout(width='100%', height='150px'))
txt_body  = widgets.Textarea(value='第一段文字。\n\n第二段文字。\n\n第三段文字。', description='內頁文字', layout=widgets.Layout(width='100%', height='200px'))
txt_back  = widgets.Textarea(value='封底簡介……\n[左圖60%][右圖60%]', description='封底文字', layout=widgets.Layout(width='100%', height='150px'))

cover_default_pct = widgets.BoundedIntText(value=100, min=10, max=100, step=1, description='封面預設%')
body_default_pct  = widgets.BoundedIntText(value=100, min=10, max=100, step=1, description='內頁預設%')
back_default_pct  = widgets.BoundedIntText(value=100, min=10, max=100, step=1, description='封底預設%')

chk_pagenum   = widgets.Checkbox(value=True,  description='內頁加頁碼 (-1-)')
chk_pagebreak = widgets.Checkbox(value=False, description='每段文字自動換頁')
dropdown_pinyin = widgets.Dropdown(options=['不加拼音','字後拼音','整句拼音'], value='不加拼音', description='拼音模式')

# 影片相關
chk_make_video = widgets.Checkbox(value=False, description='是否產生影片 (mp4)')
chk_video_cover = widgets.Checkbox(value=True, description='將封面加入影片')
chk_video_back  = widgets.Checkbox(value=True, description='將封底加入影片')
vid_w = widgets.BoundedIntText(value=1280, min=640, max=3840, step=10, description='寬(px)')
vid_h = widgets.BoundedIntText(value=720,  min=360, max=2160, step=10, description='高(px)')
vid_fps = widgets.BoundedIntText(value=24, min=10, max=60, step=1, description='FPS')
sec_per_char = widgets.FloatText(value=0.12, description='每字秒數(估)')

ui_cover, imgs_cover = image_manager('封面圖片（可多張）')
ui_body,  imgs_body  = image_manager('內頁圖片（可多張）')
ui_back,  imgs_back  = image_manager('封底圖片（可多張）')

btn_doc  = widgets.Button(description='生成 A5 小書（docx）', button_style='success')
btn_vid  = widgets.Button(description='生成影片（mp4）', button_style='primary')
out = widgets.Output()

display(widgets.VBox([
    widgets.HBox([widgets.VBox([txt_cover, cover_default_pct]), widgets.VBox([chk_pagenum, chk_pagebreak, dropdown_pinyin])]),
    ui_cover,
    widgets.Label('—— 內頁 ——'),
    txt_body,
    body_default_pct,
    ui_body,
    widgets.Label('—— 封底 ——'),
    txt_back,
    back_default_pct,
    ui_back,
    widgets.Label('—— 影片 ——'),
    widgets.HBox([chk_make_video, chk_video_cover, chk_video_back]),
    widgets.HBox([vid_w, vid_h, vid_fps, sec_per_char]),
    widgets.HBox([btn_doc, btn_vid]),
    out
]))


In [None]:
# ---- 拼音模式套用 ----
def apply_pinyin_mode(s: str) -> str:
    mode = dropdown_pinyin.value
    if mode == '字後拼音':
        return charwise_pinyin(s)
    elif mode == '整句拼音':
        return sentencewise_pinyin(s)
    return s


In [None]:
# ---- DOCX 產出 ----
def emit_stream(doc, text, size_pt, imgs, default_pct, center=False):
    skip_img = has_skip_image_tag(text); text = strip_skip_image_tag(text)
    tokens = tokenize_line(text)
    for tk in tokens:
        if tk[0]=='text':
            add_text_paragraph(doc, apply_pinyin_mode(tk[1]), size_pt, center=center)
        elif tk[0]=='single' and (not skip_img) and imgs:
            pct = tk[1] if tk[1] is not None else (default_pct or 100)
            add_single_image_paragraph(doc, imgs.pop(0), width_pct=pct, center=True)
        elif tk[0]=='double' and (not skip_img) and len(imgs)>=2:
            lp, rp = tk[1], tk[2]
            add_double_image_table(doc, imgs.pop(0), imgs.pop(0), lp, rp)

def build_docx_bytes():
    cover_text = txt_cover.value; body_text = txt_body.value; back_text = txt_back.value
    cover_imgs = [it['bytes'] for it in imgs_cover]
    body_imgs  = [it['bytes'] for it in imgs_body]
    back_imgs  = [it['bytes'] for it in imgs_back]
    cover_pct=int(cover_default_pct.value or 100); body_pct=int(body_default_pct.value or 100); back_pct=int(back_default_pct.value or 100)

    doc = Document(); set_section_to_a5(doc.sections[0])
    cover_size, cover_text2 = parse_and_strip_size_anywhere(cover_text, default_pt=18)
    emit_stream(doc, cover_text2, cover_size, cover_imgs, cover_pct, center=True)
    if cover_imgs: add_single_image_paragraph(doc, cover_imgs.pop(0), width_pct=cover_pct, center=True)

    doc.add_section(); set_section_to_a5(doc.sections[-1])
    if chk_pagenum.value: add_page_number_footer(doc.sections[-1])
    raw_lines = body_text.split('\n')
    non_empty = [ln for ln in raw_lines if ln.strip()!='']
    total = len(non_empty); done = 0
    for raw in raw_lines:
        line = raw.strip()
        if line=='': doc.add_paragraph(''); continue
        size_pt, text = parse_and_strip_size_anywhere(line, default_pt=12)
        tokens = tokenize_line(text)
        has_tag = any(tk[0] != 'text' for tk in tokens)
        emit_stream(doc, text, size_pt, body_imgs, body_pct, center=False)
        if (not has_tag) and body_imgs:
            add_single_image_paragraph(doc, body_imgs.pop(0), width_pct=body_pct, center=True)
        done += 1
        if chk_pagebreak.value and done < total:
            doc.add_page_break()

    doc.add_section(); set_section_to_a5(doc.sections[-1])
    back_size, back_text2 = parse_and_strip_size_anywhere(back_text, default_pt=12)
    emit_stream(doc, back_text2, back_size, back_imgs, back_pct, center=True)
    if back_imgs: add_single_image_paragraph(doc, back_imgs.pop(0), width_pct=back_pct, center=True)

    buf = io.BytesIO(); doc.save(buf); data = buf.getvalue()
    _ = Document(io.BytesIO(data))
    return data


In [None]:
# ---- 影片：時長估算、TTS（若不可用則靜音）、組裝 MP4 ----
def estimate_duration_by_text(text: str, sec_per_char: float = 0.12, min_sec=1.0, max_sec=12.0):
    n = max(1, len([c for c in text if not c.isspace()]))
    dur = n * max(0.02, sec_per_char)
    return float(max(min_sec, min(max_sec, dur)))

def tts_to_wav(text: str, dst_path: str):
    try:
        import pyttsx3
        eng = pyttsx3.init()
        eng.setProperty('rate', 180)
        eng.save_to_file(text, dst_path)
        eng.runAndWait()
        return os.path.exists(dst_path) and os.path.getsize(dst_path) > 0
    except Exception:
        return False

def make_image_clip(img_bytes, width, height, duration):
    im = Image.open(io.BytesIO(img_bytes)).convert('RGB')
    im = im.resize((width, height))
    arr = np.array(im)
    return ImageClip(arr).set_duration(duration)

def text_only_clip(width, height, duration):
    arr = np.full((height, width, 3), 255, dtype=np.uint8)
    return ImageClip(arr).set_duration(duration)

def build_video_mp4(save_path: str, include_cover=True, include_back=True, sec_per_char_val=0.12, width=1280, height=720, fps=24):
    if not MOVIEPY_OK:
        raise RuntimeError(f'moviepy 無法使用：{MOVIEPY_ERR}')

    cover_text = txt_cover.value.strip(); body_text = txt_body.value; back_text = txt_back.value.strip()
    cover_imgs = [it['bytes'] for it in imgs_cover]
    body_imgs  = [it['bytes'] for it in imgs_body]
    back_imgs  = [it['bytes'] for it in imgs_back]

    segments = []  # (text, img_bytes_or_None)
    if include_cover:
        segments.append((apply_pinyin_mode(cover_text), cover_imgs[0] if cover_imgs else None))
    lines = [ln.strip() for ln in body_text.split('\n') if ln.strip()!='']
    for i, line in enumerate(lines):
        segments.append((apply_pinyin_mode(line), body_imgs[i] if i < len(body_imgs) else None))
    if include_back:
        segments.append((apply_pinyin_mode(back_text), back_imgs[0] if back_imgs else None))

    clips = []
    tmp_wavs = []
    try:
        for idx, (tx, img) in enumerate(segments):
            duration = estimate_duration_by_text(tx, sec_per_char=sec_per_char_val)
            clip = make_image_clip(img, width, height, duration) if img else text_only_clip(width, height, duration)
            # 旁白（可選）：若失敗就保持靜音
            wav_path = os.path.join(tempfile.gettempdir(), f'voice_{int(time.time()*1000)}_{idx}.wav')
            if tts_to_wav(tx, wav_path):
                try:
                    aclip = AudioFileClip(wav_path).set_duration(duration)
                    clip = clip.set_audio(aclip)
                    tmp_wavs.append(wav_path)
                except Exception:
                    pass
            clips.append(clip)

        final = concatenate_videoclips(clips, method='chain')
        final.write_videofile(save_path, fps=fps, codec='libx264', audio_codec='aac')
    finally:
        for p in tmp_wavs:
            try: os.remove(p)
            except Exception: pass
    return save_path


In [None]:
# ---- 事件 ----
btn_doc, btn_vid

def on_click_doc(_):
    with out:
        clear_output()
        try:
            data = build_docx_bytes(); fname = f'a5_book_{int(time.time())}.docx'
            import base64
            b64=base64.b64encode(data).decode()
            display(HTML('<div style="color:green;">DOCX 完成！</div>'))
            display(HTML(f'<a download="{fname}" href="data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,{b64}'>📥 點此下載 {fname}</a>'))
        except Exception as e:
            display(HTML(f'<div style="color:red;">失敗：{e}</div>'))

def on_click_video(_):
    with out:
        clear_output()
        if not chk_make_video.value:
            display(HTML('<div style="color:#a60;">未勾選「是否產生影片」，已略過。</div>'))
            return
        try:
            if not MOVIEPY_OK:
                raise RuntimeError('moviepy 尚不可用，請檢查 requirements 是否包含 moviepy 與 imageio-ffmpeg。')
            mp4_name = f'a5_book_{int(time.time())}.mp4'
            save = build_video_mp4(
                save_path=mp4_name,
                include_cover=chk_video_cover.value,
                include_back=chk_video_back.value,
                sec_per_char_val=float(sec_per_char.value or 0.12),
                width=int(vid_w.value), height=int(vid_h.value), fps=int(vid_fps.value)
            )
            display(HTML('<div style="color:green;">影片完成！</div>'))
            display(HTML(f'<a href="sandbox:/{save}">🎬 下載影片 {save}</a>'))
        except Exception as e:
            display(HTML(f'<div style="color:red;">製作影片失敗：{e}</div>'))

btn_doc.on_click(on_click_doc)
btn_vid.on_click(on_click_video)
