In [None]:
#CHROME

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
import time
import random
import csv

# 初始化 WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-crash-reporter")
options.add_argument("--no-sandbox")
options.add_argument("--process-per-site")
options.add_argument("--js-flags=--max-old-space-size=20480")
options.add_argument("--js-flags=--expose-gc")
options.add_argument("--disable-background-timer-throttling")
options.add_argument("--disable-backgrounding-occluded-windows")
options.add_argument("--disable-cache")
options.add_argument("--disk-cache-size=0")
options.add_argument("--force-gpu-mem-available-mb=512")

prefs = {
    "profile.managed_default_content_settings.images": 2  # 禁用圖片加載
}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(options=options)
driver.maximize_window()

# 定義目標 URL
url = "https://maps.app.goo.gl/sHPYFWMwcezabHVv8"
driver.get(url)
time.sleep(2)

# 抓取景點名稱
try:
    title_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "h1.DUwDvf"))
    )
    title = title_element.text
    print(f"景點名稱: {title}")
except Exception as e:
    print("無法抓取景點名稱:", e)
    driver.quit()
    exit()

# 點擊評論區按鈕
try:
    comment_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "button.HHrUdb"))
    )
    comment_button.click()
    time.sleep(2)
except Exception as e:
    print("無法找到評論區按鈕:", e)
    driver.quit()
    exit()

# 更改排序為"最新"
try:
    sort_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-value=\'排序\']"))
    )
    sort_button.click()
    time.sleep(1)

    latest_button = driver.find_elements(By.CSS_SELECTOR, "div.mLuXec")[1]
    driver.execute_script("arguments[0].scrollIntoView();", latest_button)
    time.sleep(1)
    driver.execute_script("arguments[0].click();", latest_button)
    time.sleep(2)
except Exception as e:
    print("無法更改排序為最新:", e)
    driver.quit()

# 滑動頁面並抓取評論
def parse_date(date_str):
    return date_str  # 保留原始日期文本

comments = []
unique_review_ids = set()

while len(comments) < 10000:
    reviews = driver.find_elements(By.CSS_SELECTOR, "div.jftiEf")

    for review in reviews:
        try:
            review_id = review.get_attribute("data-review-id")
            if review_id in unique_review_ids:
                continue

            unique_review_ids.add(review_id)

            rating_element = review.find_element(By.CSS_SELECTOR, "span.kvMYJc")
            rating = rating_element.get_attribute("aria-label").replace(" 顆星", "")

            if review.find_elements(By.CSS_SELECTOR, "button.kyuRq.WOKzJe"):
                review.find_element(By.CSS_SELECTOR, "button.kyuRq.WOKzJe").click()
                time.sleep(0.5)

            if review.find_elements(By.CSS_SELECTOR, "button.w8nwRe.kyuRq"):
                review.find_element(By.CSS_SELECTOR, "button.w8nwRe.kyuRq").click()
                time.sleep(0.5)

            comment_elements = review.find_elements(By.CSS_SELECTOR, "span.wiI7pd")
            comment_content = comment_elements[0].text if comment_elements else ""
            if not comment_content.strip():
                continue  # 跳過沒有內文的評論
            date_element = review.find_element(By.CSS_SELECTOR, "span.rsqaWe")
            date_time = parse_date(date_element.text)

            comments.append({
                "ID": review_id,
                "Rating": rating,
                "Comment": comment_content,
                "Date": date_time
            })

            print(f"正在處理第{len(comments)}則評論")
        except Exception as e:
            print("評論抓取錯誤:", e)

    if reviews:
        try:
            comments_container = driver.find_element(By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde")

            # 隨機往下滑動
            scroll_down = random.randint(400, 1000)
            driver.execute_script(f"arguments[0].scrollBy(0, {scroll_down});", comments_container)
            print(f"模擬評論區往下滑動 {scroll_down}px 完成。")
            time.sleep(random.uniform(0.5,1.5))  # 減少等待時間

            # 隨機往上滑動
            scroll_up = random.randint(100, 200)
            driver.execute_script(f"arguments[0].scrollBy(0, -{scroll_up});", comments_container)
            print(f"模擬評論區往上滑動 {scroll_up}px 完成。")
            time.sleep(random.uniform(0.5,1.5))  # 減少等待時間

        except Exception as e:
            print("評論區容器滾動失敗:", e)
    
    # 每 100 條評論保存一次並清理內存
    if len(comments) >= 100:
        with open("comments_google_map.csv", "a", newline="", encoding="utf-8-sig") as file:
            writer = csv.DictWriter(file, fieldnames=["ID", "Rating", "Comment", "Date"])
            writer.writerows(comments)
            time.sleep(random.uniform(4, 6))
        comments.clear()
        driver.execute_script("""
            var reviews = document.querySelectorAll('div.jftiEf');
            reviews.forEach((el, index) => {
                if (index < reviews.length - 50) {
                    el.parentNode.removeChild(el);
                }
            });
            document.querySelector('div.jftiEf').innerHTML = '';""")
        driver.execute_script("""
            window.gc && window.gc();
            console.clear();""")
        
        time.sleep(random.uniform(3, 5))        
        print("已保存 100 條評論到文件並清空內存。")
        driver.execute_script("""
            var reviews = document.querySelectorAll('div.jftiEf');
            reviews.forEach((el, index) => {
                if (index < 50) el.remove();
            });""")
        time.sleep(random.uniform(1, 3))
        scroll_down = random.randint(400, 1000)
        driver.execute_script(f"arguments[0].scrollBy(0, {scroll_down});", comments_container)
        time.sleep(random.uniform(6, 10))

print("已達到設定的評論數量，停止抓取。")

# 保存剩餘評論到 CSV
with open("comments_google_map.csv", "a", newline="", encoding="utf-8-sig") as file:
    writer = csv.DictWriter(file, fieldnames=["ID", "Rating", "Comment", "Date"])
    writer.writerows(comments)

print(f"成功抓取 {len(comments)} 則評論並保存至 comments_google_map.csv")

driver.quit()


In [7]:
# 保存剩餘評論到 CSV
with open("comments_google_map.csv", "a", newline="", encoding="utf-8-sig") as file:
    writer = csv.DictWriter(file, fieldnames=["ID", "Rating", "Comment", "Date"])
    writer.writerows(comments)

print(f"成功抓取 {len(comments)} 則評論並保存至 comments_google_map.csv")

成功抓取 39 則評論並保存至 comments_google_map.csv


In [None]:
# FireFox

from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
import time
import random
import csv

# 初始化 WebDriver
options = webdriver.FirefoxOptions()
options.set_preference("permissions.default.image", 2)
options.set_preference("dom.ipc.processCount", 1)
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-crash-reporter")
options.add_argument("--no-sandbox")
options.add_argument("--process-per-site")
options.add_argument("--js-flags=--max-old-space-size=20480")
options.add_argument("--js-flags=--expose-gc")
options.add_argument("--disable-background-timer-throttling")
options.add_argument("--disable-backgrounding-occluded-windows")
options.add_argument("--disable-cache")
options.add_argument("--disk-cache-size=0")
options.add_argument("--force-gpu-mem-available-mb=512")
service = Service(GeckoDriverManager().install())
driver = webdriver.Firefox(service=service, options=options)

# 定義目標 URL
url = "https://maps.app.goo.gl/sHPYFWMwcezabHVv8"
driver.get(url)
time.sleep(20)

# 抓取景點名稱
try:
    title_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "h1.DUwDvf"))
    )
    title = title_element.text
    print(f"景點名稱: {title}")
except Exception as e:
    print("無法抓取景點名稱:", e)
    driver.quit()
    exit()

# 點擊評論區按鈕
try:
    comment_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "button.HHrUdb"))
    )
    comment_button.click()
    time.sleep(10)
except Exception as e:
    print("無法找到評論區按鈕:", e)
    driver.quit()
    exit()

# # 更改排序為"最新"
# try:
#     sort_button = WebDriverWait(driver, 10).until(
#         EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-value=\'排序\']"))
#     )
#     sort_button.click()
#     time.sleep(1)

#     latest_button = driver.find_elements(By.CSS_SELECTOR, "div.mLuXec")[2]
#     driver.execute_script("arguments[0].scrollIntoView();", latest_button)
#     time.sleep(1)
#     driver.execute_script("arguments[0].click();", latest_button)
#     time.sleep(2)
# except Exception as e:
#     print("無法更改排序為最新:", e)
#     driver.quit()

# 滑動頁面並抓取評論
def parse_date(date_str):
    return date_str  # 保留原始日期文本

comments = []
unique_review_ids = set()

while len(comments) < 10000:
    reviews = driver.find_elements(By.CSS_SELECTOR, "div.jftiEf")

    for review in reviews:
        try:
            review_id = review.get_attribute("data-review-id")
            if review_id in unique_review_ids:
                continue

            unique_review_ids.add(review_id)

            rating_element = review.find_element(By.CSS_SELECTOR, "span.kvMYJc")
            rating = rating_element.get_attribute("aria-label").replace(" 顆星", "")

            if review.find_elements(By.CSS_SELECTOR, "button.kyuRq.WOKzJe"):
                review.find_element(By.CSS_SELECTOR, "button.kyuRq.WOKzJe").click()
                time.sleep(0.5)

            if review.find_elements(By.CSS_SELECTOR, "button.w8nwRe.kyuRq"):
                review.find_element(By.CSS_SELECTOR, "button.w8nwRe.kyuRq").click()
                time.sleep(0.5)

            comment_elements = review.find_elements(By.CSS_SELECTOR, "span.wiI7pd")
            comment_content = comment_elements[0].text if comment_elements else ""
            if not comment_content.strip():
                continue  # 跳過沒有內文的評論
            date_element = review.find_element(By.CSS_SELECTOR, "span.rsqaWe")
            date_time = parse_date(date_element.text)

            comments.append({
                "ID": review_id,
                "Rating": rating,
                "Comment": comment_content,
                "Date": date_time
            })

            print(f"正在處理第{len(comments)}則評論")
        except Exception as e:
            print("評論抓取錯誤:", e)

    if reviews:
        try:
            comments_container = driver.find_element(By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde")

            # 隨機往下滑動
            scroll_down = random.randint(400, 1000)
            driver.execute_script(f"arguments[0].scrollBy(0, {scroll_down});", comments_container)
            print(f"模擬評論區往下滑動 {scroll_down}px 完成。")
            time.sleep(random.uniform(0.5,1.5))  # 減少等待時間

            # 隨機往上滑動
            scroll_up = random.randint(100, 200)
            driver.execute_script(f"arguments[0].scrollBy(0, -{scroll_up});", comments_container)
            print(f"模擬評論區往上滑動 {scroll_up}px 完成。")
            time.sleep(random.uniform(0.5,1.5))  # 減少等待時間

        except Exception as e:
            print("評論區容器滾動失敗:", e)
    
    # 每 100 條評論保存一次並清理內存
    if len(comments) >= 100:
        with open("comments_google_map.csv", "a", newline="", encoding="utf-8-sig") as file:
            writer = csv.DictWriter(file, fieldnames=["ID", "Rating", "Comment", "Date"])
            writer.writerows(comments)
            time.sleep(random.uniform(4, 6))
        comments.clear()
        driver.execute_script("""
            var reviews = document.querySelectorAll('div.jftiEf');
            reviews.forEach((el, index) => {
                if (index < reviews.length - 50) {
                    el.parentNode.removeChild(el);
                }
            });
            document.querySelector('div.jftiEf').innerHTML = '';""")
        driver.execute_script("""
            window.gc && window.gc();
            console.clear();""")
        
        time.sleep(random.uniform(3, 5))        
        print("已保存 100 條評論到文件並清空內存。")
        driver.execute_script("""
            var reviews = document.querySelectorAll('div.jftiEf');
            reviews.forEach((el, index) => {
                if (index < 50) el.remove();
            });""")
        time.sleep(random.uniform(1, 3))
        scroll_down = random.randint(400, 1000)
        driver.execute_script(f"arguments[0].scrollBy(0, {scroll_down});", comments_container)
        time.sleep(random.uniform(6, 10))

print("已達到設定的評論數量，停止抓取。")

# 保存剩餘評論到 CSV
with open("comments_google_map.csv", "a", newline="", encoding="utf-8-sig") as file:
    writer = csv.DictWriter(file, fieldnames=["ID", "Rating", "Comment", "Date"])
    writer.writerows(comments)

print(f"成功抓取 {len(comments)} 則評論並保存至 comments_google_map.csv")

driver.quit()
