In [1]:
pip install selenium beautifulsoup4 pandas


Note: you may need to restart the kernel to use updated packages.


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# === 初始化瀏覽器 ===
options = Options()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 15)

# === 目標網址 ===
BASE_URL = "https://zh-tw.hikingbook.net/explore/activities?regions=Taiwan&query=%E6%A1%83%E5%B1%B1&order=latest"
driver.get(BASE_URL)
time.sleep(3)



# === 儲存資料 ===
activity_data = []

# === 統計欄位提取 ===
def extract_stats_with_bs4(soup) -> dict:
    labels = {
        "總移動距離": None,
        "總時間": None,
        "總爬升高度": None,
        "總下降高度": None
    }
    for block in soup.select("div.col-4, div.col-8"):
        label_tag = block.select_one("small.text-light")
        value_tag = block.select_one("h4.fw-semibold")
        if label_tag and value_tag:
            label = label_tag.text.strip()
            value = value_tag.text.strip()
            if label in labels:
                labels[label] = value
    return labels

# === 日期欄位提取 ===
def extract_hike_date(soup) -> str:
    for block in soup.select("div.d-flex.text-secondary.text-small"):
        text = block.get_text(strip=True)
        if "/" in text:
            return text
    return "N/A"

# === 使用者 ID 提取 ===
def extract_user_id(soup) -> str:
    user_link = soup.select_one("a.text-decoration-none.text-dark[href^='/']")
    if user_link:
        match = re.search(r"/([^/?]+)", user_link.get("href", ""))
        if match:
            return match.group(1)
    return "unknown"

# === 主迴圈 ===
for page in range(1, 71):
    print(f"📄 處理第 {page} 頁...")

    if page > 1:
        try:
            next_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//i[@class='bi bi-chevron-right']")))
            driver.execute_script("arguments[0].scrollIntoView(true);", next_btn)
            time.sleep(0.5)
            driver.execute_script("arguments[0].click();", next_btn)
            time.sleep(3)
        except Exception as e:
            print(f"⚠️ 第 {page} 頁翻頁失敗: {e}")
            break

    cards = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.card.text-bg-white")))

    for idx, card in enumerate(cards):
        try:
            link = card.find_element(By.CSS_SELECTOR, "a.stretched-link").get_attribute("href")
            driver.execute_script("window.open(arguments[0]);", link)
            driver.switch_to.window(driver.window_handles[1])

            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.card-body")))

            soup = BeautifulSoup(driver.page_source, "html.parser")

            user_id = extract_user_id(soup)
            date = extract_hike_date(soup)
            stats = extract_stats_with_bs4(soup)

            if not stats["總移動距離"] or not stats["總時間"]:
                print(f"⚠️ 略過無效活動：{user_id}")
            else:
                record = {
                    "ID": user_id,
                    "日期": date,
                    "總移動距離": stats["總移動距離"],
                    "總時間": stats["總時間"],
                    "總爬升高度": stats["總爬升高度"],
                    "總下降高度": stats["總下降高度"]
                }
                activity_data.append(record)

            driver.close()
            driver.switch_to.window(driver.window_handles[0])
        except Exception as e:
            print(f"⚠️ 第 {idx+1} 筆活動錯誤: {e}")
            driver.switch_to.window(driver.window_handles[0])
            continue

# === 關閉瀏覽器 ===
driver.quit()

# === 匯出 CSV ===
df = pd.DataFrame(activity_data)
df.to_csv("桃山_活動資料_使用者ID版.csv", index=False, encoding="utf-8-sig")
print(f"✅ 完成！共擷取 {len(df)} 筆資料。")
df.head()


📄 處理第 1 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 2 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 3 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 4 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 5 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 6 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 7 頁...
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
⚠️ 略過無效活動：unknown
📄 處理第 8 頁...
⚠️ 略過無效活動：unkn

Unnamed: 0,ID,日期,總移動距離,總時間,總爬升高度,總下降高度
0,shao_8830,2025/06/25 - 2025/07/01・7 日・,88.5 km,63 h 24 m,"6,679 m","7,908 m"
1,a29751765,2025/07/05・1 日・,11.7 km,5 h 53 m,824 m,824 m
2,wenkuochung169856886,2025/07/04・1 日・,5.5 km,3 h 13 m,468 m,451 m
3,jiaruitwee02,2025/07/05・1 日・,7 km,5 h 19 m,535 m,540 m
4,bill1256341,2022/04/10・1 日・,19 km,11 h 31 m,"1,589 m","1,821 m"
