# A5 小書產生器（v12）— 更穩定的標記解析與版面
- 標記更彈性：接受空白與全形％，如 `[圖片 60％]`、`[左圖 60%][右圖60％]`
- **封面/封底**：同時出現單圖與雙圖標記時，不殘留標記文字；會依序插入 **單圖→雙圖**（按清單順序取圖）
- **內頁**：若段落沒有標記且仍有待用圖片，會自動在段落後插入一張（預設 60%）
- `[*pt]` 字級標記在**任意位置**都可作用（會移除標記）
- 雙圖表格預留邊界，避免圖片被裁切
- 下載仍用 **Base64** 方式，避免 403/424


In [None]:
# Imports
import io, re, base64, urllib.request, time
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

from docx import Document
from docx.shared import Mm, Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

from PIL import Image


In [None]:
# Helpers (robust)

FWP = '％'  # 全形百分比記號

def ensure_rgb_jpeg(img_bytes, quality=90):
    im = Image.open(io.BytesIO(img_bytes))
    if im.mode not in ("RGB", "L"):
        im = im.convert("RGB")
    out = io.BytesIO()
    im.save(out, format="JPEG", quality=quality)
    return out.getvalue()

def parse_and_strip_size_anywhere(text, default_pt=12):
    m = re.search(r"\[(\d+)pt\]", text)
    if m:
        size = int(m.group(1))
        text = text[:m.start()] + text[m.end():]
        return size, text
    return default_pt, text

def has_skip_image_tag(text):
    return bool(re.search(r"\[\s*無圖\s*\]\s*$", text))

def strip_skip_image_tag(text):
    return re.sub(r"\[\s*無圖\s*\]\s*$", "", text)

def find_single_tags_all(text):
    # returns list of (span_start, span_end, pct or None)
    pat = re.compile(r"\[\s*圖片\s*(\d+)?\s*[%"+FWP+"]?\s*\]")
    return [(m.start(), m.end(), int(m.group(1)) if m.group(1) else None) for m in pat.finditer(text)]

def pop_first_single_tag(text):
    tags = find_single_tags_all(text)
    if not tags:
        return None, text
    s,e,pct = tags[0]
    return pct, text[:s] + text[e:]

def has_double_tags(text):
    pat = re.compile(r"\[\s*左圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]\s*\[\s*右圖\s*(\d+)?\s*[%"+FWP+"]?\s*\]")
    m = pat.search(text)
    if not m: return None, text
    lp = int(m.group(1)) if m.group(1) else None
    rp = int(m.group(2)) if m.group(2) else None
    # strip both
    text = text[:m.start()] + text[m.end():]
    return (lp, rp), text

def set_section_to_a5(section):
    section.page_width = Mm(148)
    section.page_height = Mm(210)
    section.left_margin = Mm(15)
    section.right_margin = Mm(15)
    section.top_margin = Mm(15)
    section.bottom_margin = Mm(15)

def add_paragraph_with_size(doc, text, size_pt, align_center=False):
    p = doc.add_paragraph(text)
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER if align_center else WD_ALIGN_PARAGRAPH.LEFT
    for run in p.runs:
        run.font.size = Pt(size_pt)
    return p

def add_single_image_paragraph(doc, img_bytes, width_pct=None):
    page_w_mm = 148 - 15 - 15
    page_w_in = page_w_mm / 25.4
    if width_pct is None:
        width_in = page_w_in * 0.9
    else:
        width_in = page_w_in * (width_pct / 100.0)
        width_in = max(1.0, min(page_w_in, width_in))
    safe = ensure_rgb_jpeg(img_bytes)
    doc.add_picture(io.BytesIO(safe), width=Inches(width_in))

def add_double_image_table(doc, left_bytes, right_bytes, l_pct, r_pct):
    # 預留左右表格內邊距，避免裁切
    page_w_mm = 148 - 15 - 15
    page_w_in = page_w_mm / 25.4
    padding = 0.2  # inches total for table paddings/margins
    base_in = (page_w_in - padding) / 2.0
    l_in = base_in if l_pct is None else base_in * (l_pct / 100.0)
    r_in = base_in if r_pct is None else base_in * (r_pct / 100.0)
    l_in = max(0.8, min(base_in, l_in))
    r_in = max(0.8, min(base_in, r_in))

    table = doc.add_table(rows=1, cols=2)
    table.autofit = True
    cell = table.rows[0].cells[0]
    run = cell.paragraphs[0].add_run()
    run.add_picture(io.BytesIO(ensure_rgb_jpeg(left_bytes)), width=Inches(l_in))
    cell = table.rows[0].cells[1]
    run = cell.paragraphs[0].add_run()
    run.add_picture(io.BytesIO(ensure_rgb_jpeg(right_bytes)), width=Inches(r_in))

def add_page_number_footer(section):
    footer = section.footer
    p = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    fld_begin = OxmlElement('w:fldChar'); fld_begin.set(qn('w:fldCharType'), 'begin')
    instr = OxmlElement('w:instrText'); instr.text = ' PAGE '
    fld_end = OxmlElement('w:fldChar'); fld_end.set(qn('w:fldCharType'), 'end')
    r = OxmlElement('w:r'); r.append(fld_begin); r.append(instr); r.append(fld_end)
    p._p.append(r)

def load_image_from_line(line: str) -> bytes:
    line = line.strip()
    if not line:
        return None
    if line.startswith('data:image'):
        head, b64 = line.split(',', 1)
        return base64.b64decode(b64)
    if line.startswith('http://') or line.startswith('https://'):
        with urllib.request.urlopen(line) as resp:
            return resp.read()
    try:
        return base64.b64decode(line, validate=True)
    except Exception:
        raise ValueError("無法辨識的圖片輸入：請提供圖片 URL、data:base64 或純 base64。")

def make_data_uri_download(data: bytes, filename: str) -> HTML:
    b64 = base64.b64encode(data).decode()
    href = f'<a download="{filename}" href="data:application/vnd.openxmlformats-officedocument.wordprocessingml.document;base64,{b64}">📥 點此下載 {filename}</a>'
    return HTML(href)


In [None]:
# 圖片清單（可見、排序、刪除、清空 + URL/Base64 新增）

def image_manager(title: str):
    uploader = widgets.FileUpload(accept='image/*', multiple=True, description=f'上傳{title}')
    lst = widgets.Select(options=[], rows=6, description='順序')
    btn_up = widgets.Button(description='上移')
    btn_down = widgets.Button(description='下移')
    btn_del = widgets.Button(description='刪除')
    btn_clear = widgets.Button(description='清空')
    add_box = widgets.Textarea(placeholder='貼入圖片 URL 或 data:image/...;base64,... 或純 base64（每行一張）', layout=widgets.Layout(height='70px', width='100%'))
    add_btn = widgets.Button(description='新增到清單', button_style='info')
    status = widgets.HTML('')

    images = []  # list of dict {'name','bytes'}

    def refresh():
        lst.options = [f"{i+1:02d}. {it['name']} ({len(it['bytes'])//1024}KB)" for i,it in enumerate(images)]
        if images:
            lst.index = 0 if lst.index is None else min(lst.index, len(images)-1)
        else:
            lst.index = None

    def on_upload(change):
        nonlocal images
        for meta in uploader.value:
            content = meta.get('content', b'')
            name = meta.get('name', 'image')
            if content:
                images.append({'name': name, 'bytes': content})
        uploader.value.clear(); refresh()
        status.value = f"<span style='color:green'>已加入 {len(images)} 張</span>"

    uploader.observe(on_upload, names='value')

    def move(delta):
        if not images or lst.index is None: return
        i = lst.index; j = i + delta
        if 0 <= j < len(images):
            images[i], images[j] = images[j], images[i]
            refresh(); lst.index = j

    btn_up.on_click(lambda _: move(-1))
    btn_down.on_click(lambda _: move(1))

    def do_delete(_):
        if not images or lst.index is None: return
        images.pop(lst.index); refresh()
    btn_del.on_click(do_delete)

    def do_clear(_):
        images.clear(); refresh()
    btn_clear.on_click(do_clear)

    def do_add_from_text(_):
        nonlocal images
        lines = [ln.strip() for ln in add_box.value.splitlines() if ln.strip()]
        added = 0
        for ln in lines:
            try:
                b = load_image_from_line(ln)
                if b:
                    images.append({'name': f'added_{len(images)+1}.jpg', 'bytes': b})
                    added += 1
            except Exception:
                pass
        add_box.value = ''; refresh()
        status.value = f"<span style='color:green'>新增 {added} 張</span>" if added else "<span style='color:#a00'>沒有成功新增</span>"

    add_btn.on_click(do_add_from_text)

    ui = widgets.VBox([
        uploader,
        widgets.HBox([lst, widgets.VBox([btn_up, btn_down, btn_del, btn_clear])]),
        widgets.HBox([add_box, widgets.VBox([add_btn])]),
        status
    ])
    return ui, images


In [None]:
# 介面：文字後面緊接各自圖片清單
txt_cover = widgets.Textarea(value="神奇的迴力鏢[18pt]\n[圖片 50%]\n[圖片50%]", description='封面文字', layout=widgets.Layout(width='100%', height='120px'))
txt_body  = widgets.Textarea(value="第一段文字。\n\n第二段文字。\n\n第三段文字。", description='內頁文字', layout=widgets.Layout(width='100%', height='200px'))
txt_back  = widgets.Textarea(value="封底簡介……\n[圖片60%]\n[左圖 60%][右圖60％]", description='封底文字', layout=widgets.Layout(width='100%', height='140px'))
chk_pagenum = widgets.Checkbox(value=True, description='內頁加頁碼')

ui_cover, imgs_cover = image_manager("封面圖片（可多張）")
ui_body,  imgs_body  = image_manager("內頁圖片（可多張）")
ui_back,  imgs_back  = image_manager("封底圖片（可多張）")

btn = widgets.Button(description="生成 A5 小書（docx）", button_style='success')
out = widgets.Output()

display(widgets.VBox([
    widgets.HBox([widgets.VBox([txt_cover]), widgets.VBox([chk_pagenum])]),
    ui_cover,
    widgets.Label('—— 內頁 ——'),
    txt_body,
    ui_body,
    widgets.Label('—— 封底 ——'),
    txt_back,
    ui_back,
    btn,
    out
]))


In [None]:
# 組裝 DOCX（v12 邏輯）

def build_docx_bytes():
    cover_text = txt_cover.value
    body_text  = txt_body.value
    back_text  = txt_back.value

    cover_imgs = [it['bytes'] for it in imgs_cover]
    body_imgs  = [it['bytes'] for it in imgs_body]
    back_imgs  = [it['bytes'] for it in imgs_back]

    doc = Document()
    set_section_to_a5(doc.sections[0])

    # ---- 封面 ----
    # 取字級；去掉所有 size 標記
    cover_size, cover_text2 = parse_and_strip_size_anywhere(cover_text, default_pt=18)
    # 處理雙圖（如有）→ 先去掉；再處理單圖（可多個）
    dbl, cover_text3 = has_double_tags(cover_text2)
    singles = []
    ttmp = cover_text3
    while True:
        pct, ttmp2 = pop_first_single_tag(ttmp)
        if pct is None:
            break
        singles.append(pct)
        ttmp = ttmp2
    cover_text_clean = ttmp.strip()

    # 文字段
    if cover_text_clean:
        add_paragraph_with_size(doc, cover_text_clean, cover_size, align_center=True)
    # 依序插圖：先所有單圖，再雙圖
    img_idx = 0
    for pct in singles:
        if img_idx < len(cover_imgs):
            add_single_image_paragraph(doc, cover_imgs[img_idx], width_pct=pct)
            img_idx += 1
    if dbl and (img_idx + 1 < len(cover_imgs)):
        l_pct, r_pct = dbl
        add_double_image_table(doc, cover_imgs[img_idx], cover_imgs[img_idx+1], l_pct, r_pct)
        img_idx += 2
    # 若完全沒有標記，且有圖，預設放第一張
    if not singles and not dbl and img_idx < len(cover_imgs):
        add_single_image_paragraph(doc, cover_imgs[img_idx], width_pct=60)

    # ---- 內頁 ----
    doc.add_section(); set_section_to_a5(doc.sections[-1])
    if chk_pagenum.value: add_page_number_footer(doc.sections[-1])

    raw_lines = body_text.split('\n')
    img_idx = 0
    for raw in raw_lines:
        line = raw.strip()
        if line == '':
            doc.add_paragraph(''); continue

        size_pt, text = parse_and_strip_size_anywhere(line, default_pt=12)
        skip_img = has_skip_image_tag(text)
        text = strip_skip_image_tag(text)

        dbl, text = has_double_tags(text)
        if dbl and (not skip_img) and (img_idx + 1 < len(body_imgs)):
            text_clean = text.strip()
            if text_clean: add_paragraph_with_size(doc, text_clean, size_pt)
            l_pct, r_pct = dbl
            add_double_image_table(doc, body_imgs[img_idx], body_imgs[img_idx+1], l_pct, r_pct)
            img_idx += 2
            continue

        # 單圖：支援多個
        inserted = False
        singles = []
        ttmp = text
        while True:
            pct, t2 = pop_first_single_tag(ttmp)
            if pct is None: break
            singles.append(pct); ttmp = t2
        txt_clean = ttmp.strip()
        if txt_clean:
            add_paragraph_with_size(doc, txt_clean, size_pt)
        for pct in singles:
            if (not skip_img) and img_idx < len(body_imgs):
                add_single_image_paragraph(doc, body_imgs[img_idx], width_pct=pct)
                img_idx += 1
                inserted = True

        # 無任何標記：自動插一張
        if (not singles) and (dbl is None) and (not skip_img) and (img_idx < len(body_imgs)):
            add_single_image_paragraph(doc, body_imgs[img_idx], width_pct=60)
            img_idx += 1

    # ---- 封底 ----
    doc.add_section(); set_section_to_a5(doc.sections[-1])
    back_size, back_text2 = parse_and_strip_size_anywhere(back_text, default_pt=12)
    dbl, back_text3 = has_double_tags(back_text2)
    singles = []
    ttmp = back_text3
    while True:
        pct, ttmp2 = pop_first_single_tag(ttmp)
        if pct is None: break
        singles.append(pct); ttmp = ttmp2
    back_text_clean = ttmp.strip()

    if back_text_clean:
        add_paragraph_with_size(doc, back_text_clean, back_size, align_center=True)
    img_idx = 0
    for pct in singles:
        if img_idx < len(back_imgs):
            add_single_image_paragraph(doc, back_imgs[img_idx], width_pct=pct)
            img_idx += 1
    if dbl and (img_idx + 1 < len(back_imgs)):
        l_pct, r_pct = dbl
        add_double_image_table(doc, back_imgs[img_idx], back_imgs[img_idx+1], l_pct, r_pct)
        img_idx += 2
    if not singles and not dbl and img_idx < len(back_imgs):
        add_single_image_paragraph(doc, back_imgs[img_idx], width_pct=60)

    buf = io.BytesIO(); doc.save(buf); data = buf.getvalue()
    _ = Document(io.BytesIO(data))  # verify
    return data

def on_click(_):
    with out:
        clear_output()
        try:
            data = build_docx_bytes()
            fname = f"a5_book_{int(time.time())}.docx"
            display(HTML('<div style="color:green;">完成！</div>'))
            display(make_data_uri_download(data, fname))
        except Exception as e:
            display(HTML(f'<div style="color:red;">失敗：{e}</div>'))

btn.on_click(on_click)
