In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import csv
import re

# 設置 Selenium 驅動
options = Options()
options.add_argument("--headless")  # 如果需要顯示瀏覽器，請去掉此行
service = Service("/opt/homebrew/bin/chromedriver")  # 指定 ChromeDriver 的路徑
driver = webdriver.Chrome(service=service, options=options)

# 打開 Google Travel 的航班頁面
url = "https://www.google.com/travel/flights/search?tfs=CBwQAhooEgoyMDI0LTEwLTIzagwIAhIIL20vMGZ0a3hyDAgDEggvbS8wNnk1N0ABSAFwAYIBCwj___________8BmAEC&tfu=EgQIARAAIgA&authuser=0"
driver.get(url)

driver.implicitly_wait(10)

# 定位所有符合條件的 <li class="pIav2d"> 元素
flight_links = driver.find_elements(By.CSS_SELECTOR, "li.pIav2d")
print(f"找到 {len(flight_links)} 個航班")

# 準備寫入 CSV 檔案（以追加模式）
with open('flights_info.csv', 'a', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # 如果檔案為空，則寫入標題
    if csv_file.tell() == 0:  # 檢查檔案大小
        csv_writer.writerow([
            "出發日期", "出發時間", "出發機場代號", 
            "抵達時間", "抵達機場代號", "航空公司", 
            "停靠站數量", "停留時間", "飛行時間", 
            "機型", "航班代碼", "艙等", "價格歷史"
        ])

    # 遍歷並點擊每個航班列表項，打開新頁面
    for index in range(len(flight_links)):
        # 重新獲取航班連結，防止 StaleElementReferenceException
        flight_links = driver.find_elements(By.CSS_SELECTOR, "li.pIav2d")
        
        # 點擊
        flight_links[index].click()

        # 等待新頁面加載
        time.sleep(5)

        print(f"正在抓取航班 {index + 1} 的詳細資訊")
        
        # 抓取出發日期
        departure_date_element = driver.find_element(By.XPATH, "//span[contains(@class, 'mv1WYe')]").get_attribute("innerHTML")[:9]
        departure_date = departure_date_element.strip()

        # 抓取出發時間
        departure_time_element = driver.find_element(By.XPATH, "//div[@class='wtdjmc YMlIz ogfYpf tPgKwe']").get_attribute("aria-label")
        departure_time = departure_time_element.split("：")[-1].strip()  # 抓取時間部分

        # 抓取抵達時間
        arrival_time_element = driver.find_element(By.XPATH, "//div[@class='XWcVob YMlIz ogfYpf tPgKwe']").get_attribute("aria-label")
        arrival_time = arrival_time_element.split("：")[-1].strip()  # 抓取時間部分

        # 獲取所有符合條件的元素
        airport_elements = driver.find_elements(By.XPATH, "//span[contains(@class, 'qeoz6e HKHSfd')]/following-sibling::span[@dir='ltr']")
        # 抓取出發和抵達機場代碼
        departure_airport = airport_elements[0].get_attribute("innerHTML").strip("()")  # 第一個是出發機場
        arrival_airport = airport_elements[1].get_attribute("innerHTML").strip("()")    # 第二個是抵達機場

        # 抓取航空公司
        airline = driver.find_element(By.XPATH, "//div[contains(@class, 'sSHqwe')]/span[1]").text

        # 抓取行程時間
        travel_time_element = driver.find_element(By.XPATH, "//div[@class='CQYfx y52p7d WO360 QS0io']").get_attribute("innerHTML")

        # 使用正則表達式提取飛行時間
        match = re.search(r'(\d+ 小時 \d+ 分鐘)', travel_time_element)
        flight_duration = match.group(1) if match else "未找到飛行時間"
        
        # 抓取停靠站數量
        try:
            layover_element = driver.find_element(By.XPATH, "//div[@class='EfT7Ae AdWm1c tPgKwe']//span[@class='ogfYpf']").get_attribute("aria-label")
            layover = layover_element.split(" flight.")[0]  # 提取 "1 stop" 或 "Non-stop"
        except NoSuchElementException:
            layover = "Non-stop"

        # 抓取停留時間（如果有）
        if layover != "直達航班。":
            layover_info_element = driver.find_element(By.XPATH, '//div[contains(@class, "tvtJdb") and contains(@class, "eoY5cb")]').get_attribute("innerHTML")
            time_pattern = r'(\d+\s*小時\s*\d+\s*分鐘)'
            match = re.search(time_pattern, layover_info_element)
            layover_time = match.group(1) if match else "未找到停留時間"
        else:
            layover_time = "Non-stop"

        # 抓取機型
        aircraft = driver.find_element(By.XPATH, '//div[@class="MX5RWe sSHqwe y52p7d"]/span[@class = "Xsgmwe"][last()]').get_attribute("innerHTML")

        # 抓取航班代碼
        flight_number_element = driver.find_element(By.XPATH, '//div[@class="MX5RWe sSHqwe y52p7d"]/span[contains(@class, "Xsgmwe")][2]').get_attribute("innerHTML")
        flight_number = flight_number_element.replace('&nbsp;', ' ').strip()  # 去除前後空白

        # 抓取艙等
        cabin_class = driver.find_element(By.XPATH, '//span[contains(@class, "Xsgmwe")]/div').get_attribute("innerHTML")

        # 抓取價格歷史
        elements = driver.find_elements(By.XPATH, "//*[name()='g' and @class='ke9kZe-LkdAo-RbRzK-JNdkSc pKrx3d']")
        price_history = [element.get_attribute("aria-label") for element in elements]

        # 將資料寫入 CSV
        csv_writer.writerow([
            departure_date, departure_time, departure_airport,
            arrival_time, arrival_airport, airline,
            layover, layover_time, flight_duration,
            aircraft, flight_number, cabin_class, ', '.join(price_history)  # 將價格歷史串接為一個字符串
        ])

        # 返回上一頁
        driver.back()

        # 等待返回加載完成
        time.sleep(5)

# 關閉瀏覽器
driver.quit()