In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv
import re

# 設置 Selenium 驅動
options = Options()
options.add_argument("--headless")  # 如果需要顯示瀏覽器，請去掉此行
service = Service("/opt/homebrew/bin/chromedriver")  # 指定 ChromeDriver 的路徑
driver = webdriver.Chrome(service=service, options=options)

# 打開Google Travel的航班頁面
url = "https://www.google.com/travel/flights/booking?tfs=CBwQAhpEEgoyMDI0LTExLTMwIh8KA1RQRRIKMjAyNC0xMS0zMBoDTlJUKgJJVDIDNzAwagwIAhIIL20vMGZ0a3hyBwgBEgNOUlRAAUgBcAGCAQsI____________AZgBAg&tfu=CmhDalJJTlZaelh6VmFjM2xrV0VWQlNYbDRZM2RDUnkwdExTMHRMUzB0TFhSc2JXWXhOMEZCUVVGQlIyTkRiek5KVFRRMWN6UkJFZ1ZKVkRjd01Cb0tDUGNqRUFBYUExUlhSRGdjY0tWdhICCAAiAA&authuser=0"
driver.get(url)

driver.implicitly_wait(10)

# 抓取出發日期
departure_date_element = driver.find_element(By.XPATH, "//span[contains(@class, 'mv1WYe')]").get_attribute("innerHTML")[:9]
departure_date = departure_date_element.strip()

# 抓取出發時間
departure_time_element = driver.find_element(By.XPATH, "//div[@class='wtdjmc YMlIz ogfYpf tPgKwe']").get_attribute("aria-label")
departure_time = departure_time_element.split("：")[-1].strip()  # 抓取時間部分

# 抓取抵達時間
arrival_time_element = driver.find_element(By.XPATH, "//div[@class='XWcVob YMlIz ogfYpf tPgKwe']").get_attribute("aria-label")
arrival_time = arrival_time_element.split("：")[-1].strip()  # 抓取時間部分

# 獲取所有符合條件的元素
airport_elements = driver.find_elements(By.XPATH, "//span[contains(@class, 'qeoz6e HKHSfd')]/following-sibling::span[@dir='ltr']")
# 抓取出發和抵達機場代碼
departure_airport = airport_elements[0].get_attribute("innerHTML").strip("()")  # 第一個是出發機場
arrival_airport = airport_elements[1].get_attribute("innerHTML").strip("()")    # 第二個是抵達機場

# 抓取航空公司
airline = driver.find_element(By.XPATH, "//div[contains(@class, 'sSHqwe')]/span[1]").text

# 抓取行程時間
travel_time_element = driver.find_element(By.XPATH, "//div[@class='CQYfx y52p7d WO360 QS0io']").get_attribute("innerHTML")

# 移除 "Travel time: " 前缀（如果存在）
flight_duration = travel_time_element.replace("路程時間：", "").strip()

# 使用正則表達式提取
match = re.search(r'(\d+ 小時 \d+ 分鐘)', travel_time_element)
if match:
    flight_duration = match.group(1)
    
# 抓取停靠站數量
try:
    layover_element = driver.find_element(By.XPATH, "//div[@class='EfT7Ae AdWm1c tPgKwe']//span[@class='ogfYpf']").get_attribute("aria-label")
    layover = layover_element.split(" flight.")[0]  # 提取 "1 stop" 或 "Non-stop"
except NoSuchElementException:
    layover = "Non-stop"

if layover != "直達航班。":
    # 抓取停留時間
    layover_info_element = driver.find_element(By.XPATH, '//div[contains(@class, "tvtJdb") and contains(@class, "eoY5cb")]').get_attribute("innerHTML")
    # 使用正則表達式提取時間部分
    time_pattern = r'(\d+\s*小時\s*\d+\s*分鐘)'
    match = re.search(time_pattern, layover_info_element)
    if match:
        layover_time = match.group(1)
    else:
        layover_time = "未找到停留時間"
else:
    layover_time = "Non-stop"

# 抓取機型
aircraft = driver.find_element(By.XPATH, '//div[@class="MX5RWe sSHqwe y52p7d"]/span[@class = "Xsgmwe"][last()]').get_attribute("innerHTML")

# 抓取航班代碼
flight_number_element = driver.find_element(By.XPATH, '//div[@class="MX5RWe sSHqwe y52p7d"]/span[contains(@class, "Xsgmwe")][2]').get_attribute("innerHTML")
flight_number = flight_number_element.replace('&nbsp;', ' ').strip()  # 去除前後空白

# 抓取艙等
cabin_class = driver.find_element(By.XPATH, '//span[contains(@class, "Xsgmwe")]/div').get_attribute("innerHTML")

# 輸出結果
print(f"出發日期: {departure_date}")
print(f"出發時間: {departure_time}")
print(f"出發機場代號: {departure_airport}")
print(f"抵達時間: {arrival_time}")
print(f"抵達機場代號: {arrival_airport}")
print(f"航空公司: {airline}")
print(f"停靠站數量: {layover}")
print(f"停留時間: {layover_time}")
print(f"飛行時間: {flight_duration}")
print(f"機型: {aircraft}")
print(f"航班代碼: {flight_number}")
print(f"艙等: {cabin_class}")

# 抓取價格歷史
elements = driver.find_elements(By.XPATH, "//*[name()='g' and @class='ke9kZe-LkdAo-RbRzK-JNdkSc pKrx3d']")
price_history = [element.get_attribute("aria-label") for element in elements]
for entry in price_history:
    print(entry)

# 匯出至 CSV，改為追加模式
csv_file = 'google_flights_data_nrt.csv'


# 檢查檔案是否已存在
file_exists = False
try:
    with open(csv_file, 'r', encoding='utf-8-sig'):
        file_exists = True
except FileNotFoundError:
    file_exists = False

# 將資料轉為 column 結構，每個屬性對應一列
flight_data = {
    "出發日期": departure_date,
    "出發時間": departure_time,
    "出發機場代號": departure_airport,
    "抵達時間": arrival_time,
    "抵達機場代號": arrival_airport,
    "航空公司": airline,
    "停靠站數量": layover,
    "停留時間": layover_time,
    "飛行時間": flight_duration,
    "機型": aircraft,
    "航班代碼": flight_number,
    "艙等": cabin_class,
    "價格歷史": "; ".join(price_history)  # 把價格歷史合併到一個欄位
}

# 打開或創建 CSV 文件，並以追加模式寫入
with open(csv_file, mode='a', newline='', encoding='utf-8-sig') as file:
    writer = csv.writer(file)
    
    if not file_exists:
        # 如果檔案不存在，寫入屬性標題
        writer.writerow(flight_data.keys())
    
    # 寫入航班的資料到新的一列
    writer.writerow(flight_data.values())
    
# 關閉瀏覽器
driver.quit()

出發日期: 11月30日 週六
出發時間: 下午3:00。
出發機場代號: TPE
抵達時間: 晚上7:00。
抵達機場代號: NRT
航空公司: 台灣虎航
停靠站數量: 直達航班。
停留時間: Non-stop
飛行時間: 3 小時
機型: Airbus A320
航班代碼: IT 700
艙等: 經濟艙
60 天前 - $5,099
59 天前 - $5,099
58 天前 - $5,099
57 天前 - $5,099
56 天前 - $5,099
55 天前 - $5,099
54 天前 - $5,099
53 天前 - $3,699
52 天前 - $3,699
51 天前 - $4,099
50 天前 - $4,099
49 天前 - $4,099
48 天前 - $4,099
47 天前 - $4,099
46 天前 - $4,099
45 天前 - $4,599
44 天前 - $4,599
43 天前 - $4,599
42 天前 - $4,599
41 天前 - $4,099
40 天前 - $4,099
39 天前 - $4,099
38 天前 - $4,099
37 天前 - $4,099
36 天前 - $4,599
35 天前 - $4,599
34 天前 - $4,099
33 天前 - $4,099
32 天前 - $4,599
31 天前 - $4,599
30 天前 - $6,099
29 天前 - $6,099
28 天前 - $6,099
27 天前 - $6,099
26 天前 - $6,099
25 天前 - $6,099
24 天前 - $6,099
23 天前 - $6,099
22 天前 - $6,099
21 天前 - $6,099
20 天前 - $6,099
19 天前 - $6,099
18 天前 - $4,599
17 天前 - $3,699
16 天前 - $3,699
15 天前 - $3,699
14 天前 - $3,699
13 天前 - $3,699
12 天前 - $3,699
11 天前 - $3,699
10 天前 - $3,699
9 天前 - $4,099
8 天前 - $4,099
7 天前 - $4,599
6 天前 - $4,599
5 天前 - $4,599
4 天前 - $4,

In [23]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import csv
import re

# 設置 Selenium 驅動
options = Options()
options.add_argument("--headless")  # 如果需要顯示瀏覽器，請去掉此行
service = Service("/opt/homebrew/bin/chromedriver")  # 指定 ChromeDriver 的路徑
driver = webdriver.Chrome(service=service, options=options)

# 打開 Google Travel 的航班頁面
url = "https://www.google.com/travel/flights/search?tfs=CBwQAhooEgoyMDI0LTEwLTIzagwIAhIIL20vMGZ0a3hyDAgDEggvbS8wNnk1N0ABSAFwAYIBCwj___________8BmAEC&tfu=EgQIARAAIgA&authuser=0"
driver.get(url)

driver.implicitly_wait(10)

# 定位所有符合條件的 <li class="pIav2d"> 元素
flight_links = driver.find_elements(By.CSS_SELECTOR, "li.pIav2d")
print(f"找到 {len(flight_links)} 個航班")

# 準備寫入 CSV 檔案（以追加模式）
with open('flights_info.csv', 'a', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)

    # 如果檔案為空，則寫入標題
    if csv_file.tell() == 0:  # 檢查檔案大小
        csv_writer.writerow([
            "出發日期", "出發時間", "出發機場代號", 
            "抵達時間", "抵達機場代號", "航空公司", 
            "停靠站數量", "停留時間", "飛行時間", 
            "機型", "航班代碼", "艙等", "價格歷史"
        ])

    # 遍歷並點擊每個航班列表項，打開新頁面
    for index in range(len(flight_links)):
        # 重新獲取航班連結，防止 StaleElementReferenceException
        flight_links = driver.find_elements(By.CSS_SELECTOR, "li.pIav2d")
        
        # 點擊
        flight_links[index].click()

        # 等待新頁面加載
        time.sleep(5)

        print(f"正在抓取航班 {index + 1} 的詳細資訊")
        
        # 抓取出發日期
        departure_date_element = driver.find_element(By.XPATH, "//span[contains(@class, 'mv1WYe')]").get_attribute("innerHTML")[:9]
        departure_date = departure_date_element.strip()

        # 抓取出發時間
        departure_time_element = driver.find_element(By.XPATH, "//div[@class='wtdjmc YMlIz ogfYpf tPgKwe']").get_attribute("aria-label")
        departure_time = departure_time_element.split("：")[-1].strip()  # 抓取時間部分

        # 抓取抵達時間
        arrival_time_element = driver.find_element(By.XPATH, "//div[@class='XWcVob YMlIz ogfYpf tPgKwe']").get_attribute("aria-label")
        arrival_time = arrival_time_element.split("：")[-1].strip()  # 抓取時間部分

        # 獲取所有符合條件的元素
        airport_elements = driver.find_elements(By.XPATH, "//span[contains(@class, 'qeoz6e HKHSfd')]/following-sibling::span[@dir='ltr']")
        # 抓取出發和抵達機場代碼
        departure_airport = airport_elements[0].get_attribute("innerHTML").strip("()")  # 第一個是出發機場
        arrival_airport = airport_elements[1].get_attribute("innerHTML").strip("()")    # 第二個是抵達機場

        # 抓取航空公司
        airline = driver.find_element(By.XPATH, "//div[contains(@class, 'sSHqwe')]/span[1]").text

        # 抓取行程時間
        travel_time_element = driver.find_element(By.XPATH, "//div[@class='CQYfx y52p7d WO360 QS0io']").get_attribute("innerHTML")

        # 使用正則表達式提取飛行時間
        match = re.search(r'(\d+ 小時 \d+ 分鐘)', travel_time_element)
        flight_duration = match.group(1) if match else "未找到飛行時間"
        
        # 抓取停靠站數量
        try:
            layover_element = driver.find_element(By.XPATH, "//div[@class='EfT7Ae AdWm1c tPgKwe']//span[@class='ogfYpf']").get_attribute("aria-label")
            layover = layover_element.split(" flight.")[0]  # 提取 "1 stop" 或 "Non-stop"
        except NoSuchElementException:
            layover = "Non-stop"

        # 抓取停留時間（如果有）
        if layover != "直達航班。":
            layover_info_element = driver.find_element(By.XPATH, '//div[contains(@class, "tvtJdb") and contains(@class, "eoY5cb")]').get_attribute("innerHTML")
            time_pattern = r'(\d+\s*小時\s*\d+\s*分鐘)'
            match = re.search(time_pattern, layover_info_element)
            layover_time = match.group(1) if match else "未找到停留時間"
        else:
            layover_time = "Non-stop"

        # 抓取機型
        aircraft = driver.find_element(By.XPATH, '//div[@class="MX5RWe sSHqwe y52p7d"]/span[@class = "Xsgmwe"][last()]').get_attribute("innerHTML")

        # 抓取航班代碼
        flight_number_element = driver.find_element(By.XPATH, '//div[@class="MX5RWe sSHqwe y52p7d"]/span[contains(@class, "Xsgmwe")][2]').get_attribute("innerHTML")
        flight_number = flight_number_element.replace('&nbsp;', ' ').strip()  # 去除前後空白

        # 抓取艙等
        cabin_class = driver.find_element(By.XPATH, '//span[contains(@class, "Xsgmwe")]/div').get_attribute("innerHTML")

        # 抓取價格歷史
        elements = driver.find_elements(By.XPATH, "//*[name()='g' and @class='ke9kZe-LkdAo-RbRzK-JNdkSc pKrx3d']")
        price_history = [element.get_attribute("aria-label") for element in elements]

        # 將資料寫入 CSV
        csv_writer.writerow([
            departure_date, departure_time, departure_airport,
            arrival_time, arrival_airport, airline,
            layover, layover_time, flight_duration,
            aircraft, flight_number, cabin_class, ', '.join(price_history)  # 將價格歷史串接為一個字符串
        ])

        # 返回上一頁
        driver.back()

        # 等待返回加載完成
        time.sleep(5)

# 關閉瀏覽器
driver.quit()

找到 9 個航班
正在抓取航班 1 的詳細資訊
正在抓取航班 2 的詳細資訊
正在抓取航班 3 的詳細資訊


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//span[contains(@class, "Xsgmwe")]/div"}
  (Session info: chrome=129.0.6668.90); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x0000000105324274 cxxbridge1$str$ptr + 1907280
1   chromedriver                        0x000000010531c75c cxxbridge1$str$ptr + 1875768
2   chromedriver                        0x0000000104f30260 cxxbridge1$string$len + 89488
3   chromedriver                        0x0000000104f7450c cxxbridge1$string$len + 368700
4   chromedriver                        0x0000000104fae7d0 cxxbridge1$string$len + 606976
5   chromedriver                        0x0000000104f6912c cxxbridge1$string$len + 322652
6   chromedriver                        0x0000000104f69d7c cxxbridge1$string$len + 325804
7   chromedriver                        0x00000001052ec504 cxxbridge1$str$ptr + 1678560
8   chromedriver                        0x00000001052f0e6c cxxbridge1$str$ptr + 1697352
9   chromedriver                        0x00000001052d1618 cxxbridge1$str$ptr + 1568244
10  chromedriver                        0x00000001052f173c cxxbridge1$str$ptr + 1699608
11  chromedriver                        0x00000001052c2bbc cxxbridge1$str$ptr + 1508248
12  chromedriver                        0x000000010530d854 cxxbridge1$str$ptr + 1814576
13  chromedriver                        0x000000010530d9ac cxxbridge1$str$ptr + 1814920
14  chromedriver                        0x000000010531c3fc cxxbridge1$str$ptr + 1874904
15  libsystem_pthread.dylib             0x0000000197256f94 _pthread_start + 136
16  libsystem_pthread.dylib             0x0000000197251d34 thread_start + 8
