# 綜合都可以使用

In [3]:
# 安裝必要套件
!pip install beautifulsoup4

import requests, html, logging, re
from bs4 import BeautifulSoup
from IPython.display import display, FileLink, HTML
import ipywidgets as widgets

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 使用者輸入網址欄
url_input = widgets.Textarea(
    value='https://law.moj.gov.tw/LawClass/LawAll.aspx?pcode=J0130002\nhttps://law.moj.gov.tw/LawClass/LawAll.aspx?pcode=J0130069',
    placeholder='每行一筆法規網址',
    description='網址清單：',
    layout=widgets.Layout(width='100%', height='120px')
)
btn = widgets.Button(description="開始擷取")
output = widgets.Output()

# 法規擷取函式
def get_law_data_and_meta(url):
    res = requests.get(url)
    res.encoding = 'utf-8'
    soup = BeautifulSoup(res.text, 'html.parser')
    law_title = soup.find('title').text.split('-')[0].strip()

    amend_tr = soup.find('tr', id='trLNNDate')
    date_label = '修正日期'
    if not amend_tr:
        amend_tr = soup.find('tr', id='trLNODate')
        date_label = '發布日期'
    amend_date = amend_tr.find('td').text.strip() if amend_tr and amend_tr.find('td') else ''

    content = soup.find('div', class_='law-content')
    main_content = content.find('div', class_='law-reg-content') if content else None
    rows = main_content.find_all('div', recursive=False) if main_content else []
    chapter, section = '', ''
    law_data = []

    for r in rows:
        class_list = r.get('class', [])
        if 'h3' in class_list and 'char-2' in class_list:
            chapter = r.get_text(strip=True)
            continue
        if 'row' in class_list:
            num_div = r.find('div', class_='col-no')
            data_div = r.find('div', class_='col-data')
            if num_div and data_div:
                num = num_div.get_text(strip=True)
                text = data_div.get_text("\n", strip=True)
                if '條' in num:
                    law_data.append({
                        '章': chapter,
                        '章節': section,
                        '條': num,
                        '條文內容': text
                    })
                elif '節' in text and '條' not in text:
                    section = text

    return law_title, date_label, amend_date, law_data

# HTML 生成函式
def generate_html(title, date_label, date_text, law_data):
    filename = f"{title}.html"
    html_content = f'''<!DOCTYPE html><html><head><meta charset="utf-8"><title>{title}</title>
    <style>
    body {{ font-family: '微軟正黑體'; margin: 20px; }}
    table {{ border-collapse: collapse; width: 100%; table-layout: fixed; }}
    th, td {{ border: 1px solid #aaa; padding: 8px; text-align: left; vertical-align: top; word-break: break-word; }}
    thead th {{ background: #eee; position: sticky; top: 0; z-index: 1; }}
    tbody tr:nth-child(even) {{ background-color: #f9f9f9; }}
    tbody tr:hover {{ background-color: #eef; }}
    textarea {{ width: 100%; height: 80px; padding: 4px; box-sizing: border-box; }}
    select:disabled, textarea:disabled {{ background-color: #f5f5f5; color: #333; }}
    .button {{ padding: 10px 15px; margin: 10px; background: #4CAF50; color: white; border: none; cursor: pointer; }}
    </style>
    <script>
    let editMode = false;
    let confirmed = false;
    function toggleEdit() {{
      editMode = true;
      confirmed = false;
      document.querySelectorAll('select, textarea').forEach(el => el.disabled = false);
    }}
    function confirmEdit() {{
      confirmed = true;
      document.querySelectorAll('select, textarea').forEach(el => el.disabled = true);
    }}
    function downloadModifiedHTML() {{
      if (!confirmed) return alert('請先完成更新');
      document.querySelectorAll('tr').forEach((row) => {{
        row.querySelectorAll('select').forEach(sel => sel.setAttribute('data-selected', sel.value));
        const txt = row.querySelector('textarea');
        if (txt) txt.setAttribute('data-content', txt.value);
      }});
      const fileName = prompt('請輸入儲存檔名：', '{title}.html') || '{title}.html';
      const blob = new Blob(['<!DOCTYPE html>' + document.documentElement.outerHTML], {{ type: 'text/html' }});
      const a = document.createElement('a');
      a.href = URL.createObjectURL(blob);
      a.download = fileName;
      document.body.appendChild(a);
      a.click();
      document.body.removeChild(a);
    }}
    window.onload = () => {{
      document.querySelectorAll('tr').forEach((row) => {{
        row.querySelectorAll('select').forEach(sel => sel.value = sel.getAttribute('data-selected') || '否');
        const txt = row.querySelector('textarea');
        if (txt) txt.value = txt.getAttribute('data-content') || '';
      }});
    }}
    </script>
    </head><body>
    <h2>{title}</h2>
    <p><strong>{date_label}：</strong>{date_text}</p>
    <table>
    <thead><tr><th>章</th><th>章節</th><th>條</th><th width="500wh">條文內容</th><th>定義條文</th><th>是否適用</th><th>是否符合</th><th width="300wh">說明</th></tr></thead>
    <tbody>
    '''
    for row in law_data:
        content = html.escape(row['條文內容']).replace('\n', '<br>')
        html_content += f'''<tr>
        <td>{html.escape(row['章'])}</td>
        <td>{html.escape(row['章節'])}</td>
        <td>{html.escape(row['條'])}</td>
        <td>{content}</td>
        <td><select data-selected="否" disabled><option>否</option><option>是</option></select></td>
        <td><select data-selected=" " disabled><option> </option><option>適用</option><option>不適用</option></select></td>
        <td><select data-selected=" " disabled><option> </option><option>符合</option><option>不符合</option></select></td>
        <td><textarea data-content="" disabled></textarea></td>
        </tr>'''
    html_content += '''</tbody></table>
    <div>
      <button class="button" onclick="toggleEdit()">更新</button>
      <button class="button" onclick="confirmEdit()">完成更新</button>
      <button class="button" onclick="downloadModifiedHTML()">下載更新版本</button>
    </div>
    </body></html>'''

    with open(filename, "w", encoding='utf-8') as f:
        f.write(html_content)
    return filename

# 主執行函式
def on_click(b):
    with output:
        output.clear_output()
        urls = [line.strip() for line in url_input.value.strip().splitlines() if line.strip()]
        if not urls:
            print("❌ 請輸入網址")
            return
        for url in urls:
            print(f"🔍 處理：{url}")
            try:
                title, label, date, law_data = get_law_data_and_meta(url)
                if not law_data:
                    print(f"⚠️ 無法擷取：{url}")
                    continue
                filename = generate_html(title, label, date, law_data)
                print(f"✅ 成功擷取《{title}》共 {len(law_data)} 條條文")
                display(FileLink(filename))
                display(HTML(open(filename, encoding='utf-8').read()))
            except Exception as e:
                print(f"❌ 錯誤：{e}")

btn.on_click(on_click)
display(url_input, btn, output)




Textarea(value='https://law.moj.gov.tw/LawClass/LawAll.aspx?pcode=J0130002\nhttps://law.moj.gov.tw/LawClass/La…

Button(description='開始擷取', style=ButtonStyle())

Output()