# A5 小書產生器（v13）— 順序即所見、預設 100% 置中、內頁一段一頁
**更新重點**
- 解析採 **逐段逐標記串流**：封面/封底的文字與 `[圖片]`（含百分比）會按照你輸入的**順序**輸出；不再出現文字跑到圖片前或後的錯位。
- **預設圖片寬度 100%**、**置中**（未指定百分比時）。
- **內頁自動插圖**：無標記時，每段落自動插入 1 張圖（100% 置中）。
- **分頁原則**：內頁以「**一段文字 + 1 張圖片 = 一頁**」為原則，每個段落區塊後自動插入分頁（最後一段除外）。
- 保留：多圖清單（排序/刪除/清空/URL 或 Base64 新增）、`[左圖%][右圖%]`、`[無圖]`、行尾/任意位置的 `[14pt]` 字級。


In [None]:
# Imports
import io, re, base64, urllib.request, time
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

from docx import Document
from docx.shared import Mm, Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.section import WD_SECTION
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

from PIL import Image


In [None]:
# Helpers

FWP = '％'  # 全形百分比

def ensure_rgb_jpeg(img_bytes, quality=90):
    im = Image.open(io.BytesIO(img_bytes))
    if im.mode not in ("RGB", "L"):
        im = im.convert("RGB")
    out = io.BytesIO()
    im.save(out, format="JPEG", quality=quality)
    return out.getvalue()

def parse_and_strip_size_anywhere(text, default_pt=12):
    m = re.search(r"\[(\d+)pt\]", text)
    if m:
        size = int(m.group(1)); text = text[:m.start()] + text[m.end():]
        return size, text
    return default_pt, text

def has_skip_image_tag(text):
    return bool(re.search(r"\[\s*無圖\s*\]\s*$", text))

def strip_skip_image_tag(text):
    return re.sub(r"\[\s*無圖\s*\]\s*$", "", text)

# Tokenize a line into text / single / double tokens in input order
def tokenize_line(line):
    """
    Split `line` into tokens preserving order.
    Returns a list of tokens:
      ('text', text),
      ('single', pct_or_None),
      ('double', l_pct_or_None, r_pct_or_None)
    """
    tokens = []
    i = 0
    pattern = re.compile(
        r"\[\s*左圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]\s*\[\s*右圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]|\[\s*圖片\s*(\d+)?\s*[%"+FWP+"]?\s*\]"
    )
    while True:
        m = pattern.search(line, i)
        if not m:
            tokens.append(('text', line[i:]))
            break
        if m.start() > i:
            tokens.append(('text', line[i:m.start()]))
        if m.group(1) is not None or m.group(2) is not None:
            lp = int(m.group(1)) if m.group(1) else None
            rp = int(m.group(2)) if m.group(2) else None
            tokens.append(('double', lp, rp))
        else:
            sp = m.group(3)
            tokens.append(('single', int(sp) if sp else None))
        i = m.end()
    return tokens

def set_section_to_a5(section):
    section.page_width = Mm(148)
    section.page_height = Mm(210)
    section.left_margin = Mm(15)
    section.right_margin = Mm(15)
    section.top_margin = Mm(15)
    section.bottom_margin = Mm(15)

def add_text_paragraph(doc, text, size_pt, center=False):
    if not text.strip(): 
        return None
    p = doc.add_paragraph(text.strip())
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER if center else WD_ALIGN_PARAGRAPH.LEFT
    for run in p.runs: run.font.size = Pt(size_pt)
    return p

def add_single_image_paragraph(doc, img_bytes, width_pct=None, center=True):
    # compute width in inches for usable page width
    page_w_mm = 148 - 15 - 15
    page_w_in = page_w_mm / 25.4
    if width_pct is None:
        width_in = page_w_in * 1.0
    else:
        width_in = page_w_in * (width_pct / 100.0)
        width_in = max(1.0, min(page_w_in, width_in))
    safe = ensure_rgb_jpeg(img_bytes)
    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER if center else WD_ALIGN_PARAGRAPH.LEFT
    run = p.add_run()
    run.add_picture(io.BytesIO(safe), width=Inches(width_in))

def add_double_image_table(doc, left_bytes, right_bytes, l_pct, r_pct):
    page_w_mm = 148 - 15 - 15
    page_w_in = page_w_mm / 25.4
    padding = 0.2
    base_in = (page_w_in - padding) / 2.0
    l_in = base_in if l_pct is None else base_in * (l_pct / 100.0)
    r_in = base_in if r_pct is None else base_in * (r_pct / 100.0)
    l_in = max(0.8, min(base_in, l_in))
    r_in = max(0.8, min(base_in, r_in))

    table = doc.add_table(rows=1, cols=2)
    table.autofit = True
    for ci, b, w in [(0, left_bytes, l_in), (1, right_bytes, r_in)]:
        cell = table.rows[0].cells[ci]
        para = cell.paragraphs[0]; para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        run = para.add_run()
        run.add_picture(io.BytesIO(ensure_rgb_jpeg(b)), width=Inches(w))

def add_page_number_footer(section):
    """Add centered footer with page number styled as -1- (no spaces)."""
    footer = section.footer
    p = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    # left hyphen
    run_left = p.add_run('-')
    # page field
    fld_begin = OxmlElement('w:fldChar'); fld_begin.set(qn('w:fldCharType'), 'begin')
    instr = OxmlElement('w:instrText'); instr.text = ' PAGE '
    fld_end = OxmlElement('w:fldChar'); fld_end.set(qn('w:fldCharType'), 'end')
    r = OxmlElement('w:r'); r.append(fld_begin); r.append(instr); r.append(fld_end)
    p._p.append(r)
    # right hyphen
    run_right = p.add_run('-')

def load_image_from_line(line: str) -> bytes:
    line = line.strip()
    if not line: return None
    if line.startswith('data:image'):
        head, b64 = line.split(',', 1); return base64.b64decode(b64)
    if line.startswith('http://') or line.startswith('https://'):
        with urllib.request.urlopen(line) as resp:
            return resp.read()
    try:
        return base64.b64decode(line, validate=True)
    except Exception:
        raise ValueError("無法辨識的圖片輸入：請提供圖片 URL、data:base64 或純 base64。")

def make_data_uri_download(data: bytes, filename: str) -> HTML:
    b64 = base64.b64encode(data).decode()
    return HTML(f'<a download="{filename}" href="data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,{b64}">📥 點此下載 {filename}</a>')


In [None]:
# 圖片清單管理

def image_manager(title: str):
    uploader = widgets.FileUpload(accept='image/*', multiple=True, description=f'上傳{title}')
    lst = widgets.Select(options=[], rows=6, description='順序')
    btn_up = widgets.Button(description='上移')
    btn_down = widgets.Button(description='下移')
    btn_del = widgets.Button(description='刪除')
    btn_clear = widgets.Button(description='清空')
    add_box = widgets.Textarea(placeholder='貼入圖片 URL 或 data:image/...;base64,... 或純 base64（每行一張）', layout=widgets.Layout(height='70px', width='100%'))
    add_btn = widgets.Button(description='新增到清單', button_style='info')
    status = widgets.HTML('')

    images = []

    def refresh():
        lst.options = [f"{i+1:02d}. {it['name']} ({len(it['bytes'])//1024}KB)" for i,it in enumerate(images)]
        lst.index = 0 if images else None

    def on_upload(change):
        nonlocal images
        for meta in uploader.value:
            content = meta.get('content', b'')
            name = meta.get('name', 'image')
            if content:
                images.append({'name': name, 'bytes': content})
        uploader.value.clear(); refresh()
        status.value = f"<span style='color:green'>已加入 {len(images)} 張</span>"

    uploader.observe(on_upload, names='value')

    def move(delta):
        if not images or lst.index is None: return
        i = lst.index; j = i + delta
        if 0 <= j < len(images):
            images[i], images[j] = images[j], images[i]
            refresh(); lst.index = j

    btn_up.on_click(lambda _: move(-1))
    btn_down.on_click(lambda _: move(1))
    btn_del.on_click(lambda _: (images.pop(lst.index), refresh()) if (images and lst.index is not None) else None)
    btn_clear.on_click(lambda _: (images.clear(), refresh()))
    def do_add_from_text(_):
        nonlocal images
        lines = [ln.strip() for ln in add_box.value.splitlines() if ln.strip()]; added = 0
        for ln in lines:
            try:
                b = load_image_from_line(ln)
                if b: images.append({'name': f'added_{len(images)+1}.jpg', 'bytes': b}); added += 1
            except Exception: pass
        add_box.value=''; refresh()
        status.value = f"<span style='color:green'>新增 {added} 張</span>" if added else "<span style='color:#a00'>沒有成功新增</span>"
    add_btn.on_click(do_add_from_text)

    ui = widgets.VBox([
        uploader,
        widgets.HBox([lst, widgets.VBox([btn_up, btn_down, btn_del, btn_clear])]),
        widgets.HBox([add_box, widgets.VBox([add_btn])]),
        status
    ])
    return ui, images


In [None]:
# 介面
txt_cover = widgets.Textarea(value="神奇的迴力鏢[18pt]\n[圖片 50%]\n[圖片50%]\n種是希望", description='封面文字', layout=widgets.Layout(width='100%', height='150px'))
txt_body  = widgets.Textarea(value="第一段文字。\n\n第二段文字。\n\n第三段文字。", description='內頁文字', layout=widgets.Layout(width='100%', height='200px'))
txt_back  = widgets.Textarea(value="封底簡介……\n[圖片60%]\n[左圖60%][右圖60%]", description='封底文字', layout=widgets.Layout(width='100%', height='150px'))
chk_pagenum = widgets.Checkbox(value=True, description='內頁加頁碼')

ui_cover, imgs_cover = image_manager("封面圖片（可多張）")
ui_body,  imgs_body  = image_manager("內頁圖片（可多張）")
ui_back,  imgs_back  = image_manager("封底圖片（可多張）")

btn = widgets.Button(description="生成 A5 小書（docx）", button_style='success')
out = widgets.Output()

display(widgets.VBox([
    widgets.HBox([widgets.VBox([txt_cover]), widgets.VBox([chk_pagenum])]),
    ui_cover,
    widgets.Label('—— 內頁 ——'),
    txt_body,
    ui_body,
    widgets.Label('—— 封底 ——'),
    txt_back,
    ui_back,
    btn,
    out
]))


In [None]:
# 組裝 DOCX（v13）

def emit_stream(doc, text, size_pt, imgs, center=False):
    """按照文字內的標記先後順序輸出（單圖/雙圖/純文字）。消耗 imgs 的內容。"""
    skip_img = has_skip_image_tag(text)
    text = strip_skip_image_tag(text)
    tokens = tokenize_line(text)
    for tk in tokens:
        if tk[0] == 'text':
            add_text_paragraph(doc, tk[1], size_pt, center=center)
        elif tk[0] == 'single' and (not skip_img) and imgs:
            pct = tk[1]
            b = imgs.pop(0)
            add_single_image_paragraph(doc, b, width_pct=pct, center=True)
        elif tk[0] == 'double' and (not skip_img) and len(imgs) >= 2:
            lp, rp = tk[1], tk[2]
            b1 = imgs.pop(0); b2 = imgs.pop(0)
            add_double_image_table(doc, b1, b2, lp, rp)

def build_docx_bytes():
    cover_text = txt_cover.value
    body_text  = txt_body.value
    back_text  = txt_back.value

    cover_imgs = [it['bytes'] for it in imgs_cover]
    body_imgs  = [it['bytes'] for it in imgs_body]
    back_imgs  = [it['bytes'] for it in imgs_back]

    doc = Document()
    set_section_to_a5(doc.sections[0])

    # ---- 封面（串流輸出，順序即所見）----
    cover_size, cover_text2 = parse_and_strip_size_anywhere(cover_text, default_pt=18)
    emit_stream(doc, cover_text2, cover_size, cover_imgs, center=True)
    # 若完全無標記且尚有圖片，預設插第一張
    if cover_imgs:
        add_single_image_paragraph(doc, cover_imgs.pop(0), width_pct=100, center=True)

    # ---- 內頁 ----
    doc.add_section(); set_section_to_a5(doc.sections[-1])
    if chk_pagenum.value: add_page_number_footer(doc.sections[-1])

    raw_lines = body_text.split('\n')
    non_empty_lines = [ln for ln in raw_lines if ln.strip()!='']
    total_non_empty = len(non_empty_lines)
    count_emitted = 0

    for raw in raw_lines:
        line = raw.strip()
        if line == '':
            doc.add_paragraph(''); continue
        size_pt, text = parse_and_strip_size_anywhere(line, default_pt=12)
        tokens = tokenize_line(text)
        skip_img = has_skip_image_tag(text)

        # 以串流方式輸出，遇到單圖/雙圖就插圖；
        emit_stream(doc, text, size_pt, body_imgs, center=False)

        # 若沒有任何標記且允許自動插圖
        if (not any(tk[0] != 'text' for tk in tokens)) and (not skip_img) and body_imgs:
            add_single_image_paragraph(doc, body_imgs.pop(0), width_pct=100, center=True)

        # 每段落區塊後自動分頁（最後一段不分）
        count_emitted += 1
        if count_emitted < total_non_empty:
            doc.add_page_break()

    # ---- 封底（串流輸出，順序即所見）----
    doc.add_section(); set_section_to_a5(doc.sections[-1])
    back_size, back_text2 = parse_and_strip_size_anywhere(back_text, default_pt=12)
    emit_stream(doc, back_text2, back_size, back_imgs, center=True)
    if back_imgs:
        add_single_image_paragraph(doc, back_imgs.pop(0), width_pct=100, center=True)

    buf = io.BytesIO(); doc.save(buf); data = buf.getvalue()
    _ = Document(io.BytesIO(data))  # verify
    return data

def on_click(_):
    with out:
        clear_output()
        try:
            data = build_docx_bytes()
            fname = f"a5_book_{int(time.time())}.docx"
            display(HTML('<div style="color:green;">完成！</div>'))
            display(make_data_uri_download(data, fname))
        except Exception as e:
            display(HTML(f'<div style="color:red;">失敗：{e}</div>'))

btn.on_click(on_click)
