<a href="https://colab.research.google.com/github/Yiting916/data_analysis/blob/main/get_pet_registrations2019_2023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import urllib3
import requests
import bs4 as bs
import pandas as pd
from datetime import datetime
import json
import time

def fetch_pet_registration_data(animal, start_date, end_date):
    url = "https://www.pet.gov.tw/Handler/PostData.ashx"
    h = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
        'Content-Type': 'application/x-www-form-urlencoded',
        'X-Requested-With': 'XMLHttpRequest',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Origin': 'https://www.pet.gov.tw',
        'Referer': 'https://www.pet.gov.tw/Web/O302.aspx'
    }

    urllib3.disable_warnings()

    payload = {
        'Method': 'O302_2',
        'Param': json.dumps({
            'Animal': str(animal),  # 轉換為字串
            'SDATE': start_date.strftime('%Y/%m/%d'),
            'EDATE': end_date.strftime('%Y/%m/%d')
        })
    }

    try:
        max_retries = 3
        for attempt in range(max_retries):
            try:
                print(f"\n嘗試第 {attempt + 1} 次請求...")
                response = requests.post(url, data=payload, headers=h, verify=False, timeout=10)

                print(f"Response status code: {response.status_code}")
                print(f"Response content: {response.text}")

                response.raise_for_status()

                # 解析 JSON 回應
                data = json.loads(response.text)
                if data.get('Success') == True and data.get('Message'):
                    print("請求成功！")
                    # 解析 Message 字串為 JSON
                    message_data = json.loads(data['Message'])
                    results = []
                    for item in message_data:
                        results.append({
                            '縣市': item['AreaName'],
                            '登記數': int(item['fld02'])
                        })
                    return pd.DataFrame(results)
                else:
                    if attempt < max_retries - 1:
                        print("等待 5 秒後重試...")
                        time.sleep(5)
                    continue

            except requests.Timeout:
                print(f"請求逾時")
                if attempt < max_retries - 1:
                    print("等待 2 秒後重試...")
                    time.sleep(2)
                continue

    except requests.exceptions.RequestException as e:
        print(f"請求發生錯誤: {e}")
    # except json.JSONDecodeError as e:
    #     print(f"JSON 解析錯誤: {e}")
    # except Exception as e:
    #     print(f"發生未預期的錯誤: {e}")

    return pd.DataFrame()

# 執行爬蟲
all_data = []
# 0:狗 1:貓
for animal in range(2):
    pet_type = "狗" if animal == 0 else "貓"
    for yr in range(2019, 2024):
        print(f"\n正在處理 {yr} 年的{pet_type}隻登記資料...")
        start_date, end_date = datetime(yr, 1, 1), datetime(yr, 12, 31)
        df = fetch_pet_registration_data(animal, start_date, end_date)

        if not df.empty:
            df['年'] = yr
            df['寵物類別'] = pet_type
            all_data.append(df)
            print(f"{yr} 年{pet_type}隻資料處理完成")
        else:
            print(f"{yr} 年{pet_type}隻資料處理失敗")

        time.sleep(2)  # 在每次請求之間添加延遲

# 合併所有資料
if all_data:
    final_df = pd.concat(all_data, ignore_index=True)
    print("\n最終結果:")
    print(final_df)

    # 儲存資料
    filename = f'pet_registrations_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    final_df.to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"\n資料已儲存至 {filename}")
else:
    print("\n沒有成功獲取任何資料")


正在處理 2019 年的狗隻登記資料...

嘗試第 1 次請求...
Response status code: 200
Response content: {"KeyNo":"","Success":true,"ErrorMessage":"","Message":"[{\"AreaID\":\"A\",\"AreaName\":\"新北市\",\"fld01\":275,\"fld02\":15028,\"fld03\":1845,\"fld05\":11328,\"fld06\":14850,\"fld04\":8054,\"fld08\":743,\"fld07\":385,\"fld10\":27,\"QueryDT\":\"2025/02/11\"},{\"AreaID\":\"V\",\"AreaName\":\"臺北市\",\"fld01\":217,\"fld02\":8731,\"fld03\":985,\"fld05\":5709,\"fld06\":9339,\"fld04\":5622,\"fld08\":446,\"fld07\":161,\"fld10\":19,\"QueryDT\":\"2025/02/11\"},{\"AreaID\":\"S\",\"AreaName\":\"臺中市\",\"fld01\":211,\"fld02\":14178,\"fld03\":1781,\"fld05\":7424,\"fld06\":10138,\"fld04\":9789,\"fld08\":1118,\"fld07\":1791,\"fld10\":139,\"QueryDT\":\"2025/02/11\"},{\"AreaID\":\"U\",\"AreaName\":\"臺南市\",\"fld01\":152,\"fld02\":10621,\"fld03\":840,\"fld05\":3632,\"fld06\":5284,\"fld04\":5461,\"fld08\":259,\"fld07\":538,\"fld10\":32,\"QueryDT\":\"2025/02/11\"},{\"AreaID\":\"W\",\"AreaName\":\"高雄市\",\"fld01\":198,\"fld02\":1586