In [69]:
import requests
from bs4 import BeautifulSoup
import json
import os
from urllib.parse import urljoin

url = "https://www.iam.gov.mo/canil/c/catchcat/list.aspx"

# 发送HTTP请求
response = requests.get(url)

In [106]:
def download_animal_data(animal_table):
    temp_id = animal_table.find('td', string='臨時檔案編號：').find_next('td').text.strip()
    cabin_id = animal_table.find('td', {'colspan': '2'}).text
    capture_date = animal_table.find('td', string='捕捉日期：').find_next('td').text.strip()
    capture_time = animal_table.find('td', string='捕捉時間：').find_next('td').text.strip()
    capture_location = animal_table.find('td', string='捕捉地點：').find_next('td').text.strip()
    capture_datestr = f"{capture_time} {capture_date}"
    link_element = table.find('td', class_='detailtd').find('a')
    link = urljoin(url, link_element['href'])

    data = {
        'temp_id': temp_id,
        'cabin_id': cabin_id,
        'capture_datestr': capture_datestr,
        'capture_location': capture_location,
        'link': link
    }
    
    if '大學' in capture_location:
        save_path = f'UM_cat_dataset/query/{temp_id}'
        os.makedirs(save_path, exist_ok=True)

        json_filename = f'{save_path}/IAM_cat_info_{temp_id}.json'
        with open(json_filename, 'w', encoding='utf-8') as json_file:
            json.dump(data, json_file, ensure_ascii=False, indent=2)

        photo_url = urljoin(url, animal_table.find('img', class_='animalphoto')['src'])
        photo_response = requests.get(photo_url)
        photo_filename = f"{save_path}/{photo_url.split('/')[-1].replace('.jpg', '')}.jpg"
        with open(photo_filename, 'wb') as photo_file:
            photo_file.write(photo_response.content)

        print(f"[INFO] 保存至: {json_filename} {capture_location}")
    

In [107]:
# 检查请求是否成功
if response.status_code == 200:
    # 使用BeautifulSoup解析HTML内容
    soup = BeautifulSoup(response.text, 'html.parser')

    # 查找具有特定类名的表格
    animal_tables = soup.find_all('table', class_='animalboxtable')
    
    if animal_tables:
        for animal_table in animal_tables:
            download_animal_data(animal_table)
    else:
        print("未找到表格")
else:
    print(f"Fail: {response.status_code}")


[INFO] 保存至: UM_cat_dataset/query/20240115/IAM_cat_info_20240115.json 大學大馬路
[INFO] 保存至: UM_cat_dataset/query/20240115BT1/IAM_cat_info_20240115BT1.json 大學大馬路
[INFO] 保存至: UM_cat_dataset/query/20240116BT1/IAM_cat_info_20240116BT1.json 大學大馬路
[INFO] 保存至: UM_cat_dataset/query/20240117A/IAM_cat_info_20240117A.json 大學大馬路
