In [9]:
# 📦 載入必要套件
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time, os, glob
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
from pathlib import Path

# 📁 設定下載資料夾
folder = str(Path.home() / "Desktop" / "ispan" / "scritpts" / "results")
os.makedirs(folder, exist_ok=True)

# 要下載的年份
year = "113"
# 要下載的縣市
"""    縣市清單，請輸入代碼(請用字串形式輸入)
{
    "新北市": "0001", "宜蘭縣": "0002", "桃園市": "0003", "新竹縣": "0004",
    "苗栗縣": "0005", "臺中市": "0006", "彰化縣": "0007", "南投縣": "0008",
    "雲林縣": "0009", "嘉義縣": "0010", "臺南市": "0011", "高雄市": "0012",
    "屏東縣": "0013", "臺東縣": "0014", "花蓮縣": "0015", "澎湖縣": "0016",
    "基隆市": "0017", "新竹市": "0018", "嘉義市": "0020", "臺北市": "0063",
    "金門縣": "0065", "連江縣": "0066"
}
    """
city_code = "0001"

# 建立一個字典存放縣市別
city_dict = {
    "0001": "新北市", "0002": "宜蘭縣", "0003": "桃園市", "0004": "新竹縣",
    "0005": "苗栗縣", "0006": "臺中市", "0007": "彰化縣", "0008": "南投縣",
    "0009": "雲林縣", "0010": "嘉義縣", "0011": "臺南市", "0012": "高雄市",
    "0013": "屏東縣", "0014": "臺東縣", "0015": "花蓮縣", "0016": "澎湖縣",
    "0017": "基隆市", "0018": "新竹市", "0020": "嘉義市", "0063": "臺北市",
    "0065": "金門縣", "0066": "連江縣"
}

def download_vegetable(folder):
    """下載種植面積資料"""
    
    # 🌐 設定瀏覽器選項
    options = webdriver.ChromeOptions()
    options.add_experimental_option("prefs", {
        "download.default_directory": folder,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    })
    
    # 清理舊檔案（避免抓到之前的檔案）
    old_files = set(glob.glob(os.path.join(folder, "*.xls")))
    
    driver = webdriver.Chrome(options=options)
    
    try:
        # print(f"\n🌿 正在下載：{veg_name} (代碼: {veg_code})")
        driver.get("https://agr.afa.gov.tw/afa/pgprodcrop_cond.jsp")
        
        # 等待頁面載入
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "accountingyear"))
        )
        
        # 選擇選項
        Select(driver.find_element(By.NAME, "accountingyear")).select_by_value(year)
        time.sleep(0.5)
        driver.find_element(By.XPATH, '//input[@name="city_town" and @value="city"]').click()
        time.sleep(0.5)
        Select(driver.find_element(By.NAME, "city")).select_by_value(city_code)
        time.sleep(1)
        
        # 點擊下載
        print("⬇️ 點擊下載按鈕...")
        driver.find_element(By.XPATH, '//input[@value="下  載"]').click()
        
        # 等待新檔案出現
        max_wait = 30
        start_time = time.time()
        new_file = None
        
        while time.time() - start_time < max_wait:
            current_files = set(glob.glob(os.path.join(folder, "*.xls")))
            new_files = current_files - old_files
            
            if new_files:
                new_file = list(new_files)[0]
                print(f"✅ 下載完成：{os.path.basename(new_file)}")
                break
            
            time.sleep(1)
        
        return new_file
        
    except Exception as e:
        print(f"❌ 下載時發生錯誤：{e}")
        return None
        
    finally:
        driver.quit()
        print("🔒 瀏覽器已關閉")

def process_xls_file(xls_path, output_name, folder):
    """處理下載的 XLS 檔案並轉成 CSV，然後刪除 XLS"""
    
    try:
        # 讀取 HTML 格式的 XLS
        with open(xls_path, "r", encoding="utf-8") as f:
            html = f.read()
        
        # 使用 StringIO 避免警告
        tables = pd.read_html(StringIO(html), flavor="lxml")
        
        print(f"📊 找到 {len(tables)} 個表格")
        
        # 取得第三個表格並整理
        df = tables[2]
        df.columns = df.iloc[0] + "(" + df.iloc[1] + ")"
        df = df.drop([0, 1]).reset_index(drop=True)
        
        # 儲存成 CSV
        csv_path = os.path.join(folder, f"{output_name}.csv")
        df.to_csv(csv_path, index=False, encoding="utf-8-sig")
        
        print(f"📊 資料筆數：{len(df)} 筆")
        print(f"💾 已儲存：{output_name}.csv")
        
        # 🗑️ 刪除 XLS 檔案
        os.remove(xls_path)
        print(f"🗑️ 已刪除臨時檔案：{os.path.basename(xls_path)}")
        
        return df
        
    except Exception as e:
        print(f"❌ 處理檔案時發生錯誤：{e}")
        return None

# 🚀 主程式：批次下載面積資料
print("=" * 60)
print("🌱 開始批次下載面積資料")
print("=" * 60)

# 主程式
xls_path = download_vegetable(folder)
if xls_path:
    # 處理並轉換檔案
    df = process_xls_file(xls_path, f"{year}年{city_dict[city_code]}各作物面積資料", folder)

    if df is not None:
        print(f"\n📋 {"面積資料"} 資料預覽：")
        print(df.head())
        print("-" * 60)
    else:
        print(f"⚠️ {"面積資料"} 下載失敗")

    time.sleep(2)  # 避免請求過快

print("\n" + "=" * 60)
print("✅ 全部完成！")
print(f"📂 檔案儲存位置：{folder}")
print("=" * 60)

# 🚀 自動開啟資料夾
import subprocess
subprocess.run(['explorer', folder])

🌱 開始批次下載面積資料


WebDriverException: Message: Service /root/.cache/selenium/chromedriver/linux64/141.0.7390.122/chromedriver unexpectedly exited. Status code was: 127
