In [2]:
# 匯入套件
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
import requests
import os
import re

# ========== 設定儲存資料夾 ==========
folderPath = "homework_books"

# ========== 啟動瀏覽器 ==========
web_options = webdriver.ChromeOptions()
web_options.add_argument("--start-maximized")
web_options.add_argument("--incognito")
web_options.add_argument("--disable-popup-blocking")
web_options.add_argument("--disable-notifications")
web_options.add_argument("--lang=zh-TW")

driver = webdriver.Chrome(options=web_options)

# ========== 訪問網頁 ==========
def visit():
    driver.get("https://www.gutenberg.org/browse/languages/zh")

# ========== 爬取書籍連結 ==========
def parse():
    book = {}
    elements = driver.find_elements(By.CSS_SELECTOR, 'li.pgdbetext a[href]')
    for i in elements[:400]:
        title = i.text
        if re.match(r'[\u4e00-\u9fff，。！？、：「」『』（）《》〈〉—…；．·～]+', title) is None:
            continue
        href_attr = i.get_attribute('href')
        href_num = re.findall(r'\d+', href_attr)[0]
        href_target = f"https://www.gutenberg.org/cache/epub/{href_num}/pg{href_num}.txt"
        book.setdefault(title, href_target)
    return book

# ========== 存檔 ==========
def savetxt(title, content):
    safe_title = re.sub(r'[\\/:*?"<>|]', "_", title)
    if not os.path.exists(folderPath):
        os.makedirs(folderPath)
    file_path = f"{folderPath}/{safe_title}.txt"
    if os.path.exists(file_path):
        print(f"{safe_title}.txt 已存在，跳過保存")
    else:
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(content)
            print(f"已儲存：{safe_title}.txt")

# ========== 關閉瀏覽器 ==========
def exit():
    global driver
    try:
        driver.quit()
    except Exception as e:
        print(f"關閉瀏覽器時錯誤：{e}")
    finally:
        print("瀏覽器已關閉")

# ========== 主程式 ==========
visit()
sleep(2)

books = parse()
print(f"共 {len(books)} 本書，開始下載...")

for idx, (title, url) in enumerate(books.items(), start=1):
    try:
        res = requests.get(url)
        res.encoding = 'utf-8'
        savetxt(title, res.text)
    except Exception as e:
        print(f"[{idx}] 下載失敗 {title}：{e}")

exit()

共 316 本書，開始下載...
已儲存：豆棚閒話.txt
已儲存：戲中戲.txt
已儲存：比目魚.txt
已儲存：三字經.txt
已儲存：山水情.txt
已儲存：山海經.txt
[7] 下載失敗 施公案：('Connection broken: IncompleteRead(1890833 bytes read, 1682751 more expected)', IncompleteRead(1890833 bytes read, 1682751 more expected))
已儲存：易經.txt
已儲存：木蘭奇女傳.txt
已儲存：海公案.txt
已儲存：燕丹子.txt
已儲存：狄公案.txt
已儲存：百家姓.txt
已儲存：禮記.txt
已儲存：綠牡丹.txt
已儲存：詩經.txt
已儲存：麟兒報.txt
已儲存：天豹圖.txt
已儲存：梁公九諫.txt
已儲存：長恨歌.txt
已儲存：李娃傳.txt
已儲存：玉樓春.txt
已儲存：漢書.txt
已儲存：引鳳蕭.txt
已儲存：今古奇觀.txt
已儲存：後西游記.txt
已儲存：飛跎全傳.txt
已儲存：佛說四十二章經.txt
已儲存：紅樓夢.txt
已儲存：洛神賦.txt
已儲存：晁氏儒言 一卷.txt
已儲存：水滸後傳.txt
已儲存：幼學瓊林.txt
已儲存：治世餘聞.txt
已儲存：琵琶記.txt
已儲存：雪月梅傳.txt
已儲存：龍川詞.txt
已儲存：三國志.txt
[39] 下載失敗 隋唐演義：('Connection broken: IncompleteRead(1205393 bytes read, 660056 more expected)', IncompleteRead(1205393 bytes read, 660056 more expected))
已儲存：論語.txt
已儲存：滬語開路 = Conversational Exercises in the Shanghai Dialect.txt
已儲存：白圭志.txt
已儲存：孟子字義疏證.txt
已儲存：安樂集.txt
已儲存：鄧析子.txt
已儲存：醉醒石.txt
已儲存：唐鍾馗平鬼傳.txt
已儲存：春秋繁露.txt
已儲存：虬髯客傳.txt
已儲存：吳船錄.txt
已儲存：星槎勝覽.txt
已儲存：喻世明言.txt
已