抓取座位數

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# 初始化緩存
cache = {}

# 定義爬取座位數的函數
def get_seat_details(flight_code):
    if flight_code in cache:  # 如果航班代碼已查詢過，直接從緩存中返回結果
        return cache[flight_code]
    
    try:
        # 打開查詢網站
        url = "https://flightera.net/en/search"
        driver.get(url)
        
        # 找到輸入框並輸入航班代碼
        search_box = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "flnr-autocomplete"))
        )
        search_box.clear()
        search_box.send_keys(flight_code)

        # 點擊搜尋按鈕
        search_button = driver.find_element(By.XPATH, '//button[@type="submit" and contains(text(), "Search")]')
        search_button.click()
        
        # 等待結果頁面加載
        time.sleep(3)
        
        # 抓取座位數資訊
        total_seats = int(driver.find_element(By.XPATH, '//dd[contains(@class, "text-gray-800") and contains(text(), "seats")]').text.split(" ")[0])
        try:
            business_seats = int(driver.find_element(By.XPATH, '//span[contains(text(), "Business")]').text.split(" ")[0])
        except:
            business_seats = 0
        try:
            eco_plus_seats = int(driver.find_element(By.XPATH, '//span[contains(text(), "Eco+")]').text.split(" ")[0])
        except:
            eco_plus_seats = 0
        try:
            economy_seats = int(driver.find_element(By.XPATH, '//span[contains(text(), "Economy")]').text.split(" ")[0])
        except:
            economy_seats = 0

        # 將結果保存到緩存
        cache[flight_code] = (total_seats, business_seats, eco_plus_seats, economy_seats)
        return cache[flight_code]
    except Exception as e:
        print(f"航班 {flight_code} 查詢失敗: {e}")
        return None, None, None, None

# 初始化 Selenium 瀏覽器
driver_path = "/opt/homebrew/bin/chromedriver"
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=driver_path, options=options)

# 讀取資料檔案
file_path = '/Users/yuchingchen/Documents/專題/cleaned_data/sydney.csv'  # 替換成你的檔案路徑
data = pd.read_csv(file_path)

# 提取唯一航班代碼
unique_flight_codes = data['航班代碼'].unique()

# 查詢每個唯一航班代碼的座位數資訊
for flight_code in unique_flight_codes:
    get_seat_details(flight_code)

# 關閉瀏覽器
driver.quit()

# 將查詢結果映射回原始資料
data['總座位數'] = data['航班代碼'].map(lambda x: cache[x][0] if x in cache else None)
data['商務座位數'] = data['航班代碼'].map(lambda x: cache[x][1] if x in cache else None)
data['豪華經濟座位數'] = data['航班代碼'].map(lambda x: cache[x][2] if x in cache else None)
data['經濟座位數'] = data['航班代碼'].map(lambda x: cache[x][3] if x in cache else None)

# 保存結果至新檔案
output_file_path = "/Users/yuchingchen/Documents/專題/cleaned_data/sydney_with_seat_details.csv"
data.to_csv(output_file_path, index=False)
print(f"已保存結果至：{output_file_path}")