In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv


In [None]:
# init csv file
csv_header = [
    "year", "Month", "Day",
    "Temp_Max", "Temp_Avg", "Temp_Min",
    "Dew_Max", "Dew_Avg", "Dew_Min",
    "Humidity_Max", "Humidity_Avg", "Humidity_Min",
    "Wind_Max", "Wind_Avg", "Wind_Min",
    "Pressure_Max", "Pressure_Avg", "Pressure_Min",
    "Precipitation"
]

with open('weather_data.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(csv_header)  # 写入标题

In [2]:
# open a webdriver
def open_driver():
    options = webdriver.ChromeOptions()
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    return driver

driver = open_driver()

In [3]:
# catch the data per month
def catch_month_data(driver, year, month):
    url = f"https://www.wunderground.com/history/monthly/mo/Macau/VMMC/date/{year}-{month}"
    driver.get(url)
    WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[1]")))
    driver.execute_script("window.stop();")

    days = [day.text for day in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[1]")]
    temperature = [temp.text for temp in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[2]")]
    dew = [dew.text for dew in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[3]")]
    humidity = [hum.text for hum in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[4]")]
    wind = [wind.text for wind in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[5]")]
    pressure = [press.text for press in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[6]")]
    precipitation = [prec.text for prec in driver.find_elements(By.XPATH, "//lib-city-history-observation/div/div[2]/table/tbody/tr/td[7]")]

    data = []
    data.append(days)
    data.append(temperature)
    data.append(dew)
    data.append(humidity)
    data.append(wind)
    data.append(pressure)
    data.append(precipitation)

    print (f"{year}年{month}月天气数据下载完成")
    return data,year


In [4]:
# save data into csv file
def save_date(data,year):
    # 步骤1：解析每个字段的数据
    def parse_column(raw_col):
        """将原始列数据拆分为标题和各行数据"""
        lines = raw_col[0].split('\n')
        header = lines[0] if len(lines) > 0 else ""
        values = lines[1:] if len(lines) > 1 else []
        return header, values

    # 解析所有列
    headers = []
    all_values = []
    for col in data:
        header, values = parse_column(col)
        headers.append(header)
        all_values.append(values)

    # 步骤2：重组为按日期的行数据
    csv_rows = []
    for day_idx in range(31):  # 假设每月最多31天
        row_data = []
        for col_idx in range(len(all_values)):
            if day_idx < len(all_values[col_idx]):
                row_data.append(all_values[col_idx][day_idx])
            else:
                pass
        
        # 将温度/露点等Max Avg Min拆分为单独列
        if len(row_data) >= 6:  # 确保有足够列
            new_row = [
                year,
                headers[0],        # Month
                row_data[0],      # Day
                *row_data[1].split(),  # Temp (Max, Avg, Min)
                *row_data[2].split(),  # Dew (Max, Avg, Min)
                *row_data[3].split(),  # Humidity
                *row_data[4].split(),  # Wind
                *row_data[5].split(),  # Pressure
                row_data[6] if len(row_data) > 6 else ""  # Precipitation
            ]
            csv_rows.append(new_row)

    # 步骤3：写入CSV文件
    csv_header = [
        "Month", "Day",
        "Temp_Max", "Temp_Avg", "Temp_Min",
        "Dew_Max", "Dew_Avg", "Dew_Min",
        "Humidity_Max", "Humidity_Avg", "Humidity_Min",
        "Wind_Max", "Wind_Avg", "Wind_Min",
        "Pressure_Max", "Pressure_Avg", "Pressure_Min",
        "Precipitation"
    ]

    with open('weather_data.csv', 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(csv_rows)   # 写入数据

    return

In [None]:
# 获取数据(2001.1-2024.12)
for year in range(2021,2025):
    for month in range(1,13):
        data,year = catch_month_data(driver, year, month)
        save_date(data,year)

In [None]:
# 获取数据(2025.1-2025.3)
for month in range(1,4):
    data,year = catch_month_data(driver, 2025, month)
    save_date(data,year)