In [9]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
import time
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# 设置 EdgeDriver 路径
edge_driver_path = r"C:\Program Files\edgedriver_win64\msedgedriver.exe"
service = Service(edge_driver_path)

# 启动 Edge
options = webdriver.EdgeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")

driver = webdriver.Edge(service=service, options=options)

data = []

# 抓取数据
for page in range(1, 4):
    print(f"正在抓取第 {page} 页")
    driver.get(f"https://zhangjiakou.zu.fang.com/house-a014963/i3{page}/")

    # 显式等待
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "listBox"))
    )

    list_box = driver.find_element(By.ID, "listBox")
    rents = list_box.find_elements(By.CSS_SELECTOR, "dl")  

    print(f"第 {page} 页抓到 {len(rents)} 条租房")

    # 每个房源
    for r in rents:
        try:
            info_area = r.find_element(By.CSS_SELECTOR, "p.font15.mt12.bold").text.strip()
            total_rent = r.find_element(By.CSS_SELECTOR, "div.moreInfo p.mt5.alingC span.price").text.strip()

            # 提取面积
            area_list = [x.strip() for x in info_area.split('|') if '㎡' in x]
            area_number = area_list[0] if area_list else None
            
             # 计算平均租金
            if area_number and total_rent:
                area_num = float(area_number.replace("㎡", ""))
                rent_num = float(total_rent)
                avg_rent = round(rent_num / area_num, 2)
            else:
                avg_rent = None
                
            data.append({
                "m2": area_number,
                "location": "下花园",
                "price": total_rent,
                "price_per_m2": avg_rent
                
            })

        except Exception as e:
            print("抓取出错:", e)
            continue

# 保存数据
df = pd.DataFrame(data)
df.to_csv("xiahuayuan_rent_price.csv", index=False)

driver.quit()
print("完成")

正在抓取第 1 页
第 1 页抓到 60 条租房
正在抓取第 2 页
第 2 页抓到 60 条租房
正在抓取第 3 页
第 3 页抓到 54 条租房
完成
