## Dataset_API

### wildfire data API

In [None]:
import time
import urllib.parse
import xml.etree.ElementTree as ET
from collections import defaultdict

import pandas as pd
import requests
import urllib3

# SSL 경고 무시
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# 기본 설정
base_url = "http://apis.data.go.kr/1400000/forestStusService/getfirestatsservice"

# API 키를 URL 인코딩 (디코딩된 상태로 사용)
service_key = "8PrqzfscVoUU4WHLk0FgBthrspJ0El8ChP7PMQ819DDgQs4dpck4JcNUiMm1kPfdL1dYx/vPRiEyYtJvK8QiVQ=="

params = {
    "serviceKey": service_key,  # 인코딩하지 않은 키 사용
    "searchStDt": "20250101",
    "searchEdDt": "20251231",  # 2025년 데이터만 포함
    "numOfRows": "1000",  # 한 번에 더 많은 데이터를 가져오도록 수정
    "pageNo": "1",
    "_type": "xml",
}

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept": "application/xml",
    "Content-Type": "application/xml",
}


# XML → 리스트 변환 함수
def parse_items(xml_text):
    try:
        root = ET.fromstring(xml_text)
        items = root.find(".//items")
        data = []
        if items is not None:
            for item in items.findall("item"):
                record = {child.tag: child.text for child in item}
                data.append(record)
        return data
    except ET.ParseError as e:
        print(f"XML 파싱 오류: {e}")
        print("Raw XML:", xml_text)
        return []


# 1. 전체 건수 확인
print("Requesting API...")
try:
    response = requests.get(
        base_url, params=params, headers=headers, timeout=10, verify=False
    )
    print("API Response:")
    print(response.text)
    print("\nResponse Status Code:", response.status_code)
    print("Response Headers:", response.headers)
    print("\nRequest URL:", response.url)  # 실제 요청 URL 출력

    if response.status_code != 200:
        print(f"Error: API returned status code {response.status_code}")
        exit(1)

    root = ET.fromstring(response.text)

    # 에러 메시지 확인
    error_msg = root.find(".//errMsg")
    if error_msg is not None and error_msg.text:
        print(f"API Error: {error_msg.text}")
        auth_msg = root.find(".//returnAuthMsg")
        if auth_msg is not None and auth_msg.text:
            print(f"Auth Error: {auth_msg.text}")
        exit(1)

    total_count_element = root.find(".//totalCount")
    if total_count_element is None:
        print("Error: totalCount element not found in XML response")
        print("XML structure:", ET.tostring(root, encoding="unicode"))
        exit(1)

    total_count = int(total_count_element.text)
    print(f"총 건수: {total_count}")

except requests.exceptions.RequestException as e:
    print(f"Request Error: {e}")
    exit(1)
except ET.ParseError as e:
    print(f"XML 파싱 오류: {e}")
    print("Raw response:", response.text)
    exit(1)
except ValueError as e:
    print(f"숫자 변환 오류: {e}")
    print(
        "totalCount value:",
        total_count_element.text if total_count_element is not None else "None",
    )
    exit(1)

total_pages = (total_count + int(params["numOfRows"]) - 1) // int(params["numOfRows"])

# 2. 모든 데이터 수집 및 연도별 분류
yearly_data = defaultdict(list)

for page in range(1, total_pages + 1):
    print(f"불러오는 중... page {page}/{total_pages}")
    params["pageNo"] = str(page)
    try:
        res = requests.get(
            base_url, params=params, headers=headers, timeout=10, verify=False
        )
        items = parse_items(res.text)
        for item in items:
            year = item.get("startyear", "Unknown")
            yearly_data[year].append(item)
        time.sleep(0.1)  # 과도한 요청 방지
    except requests.exceptions.RequestException as e:
        print(f"Request Error on page {page}: {e}")
        continue

# 3. 연도별 CSV 저장
for year, records in sorted(yearly_data.items()):
    if records:  # 데이터가 있는 경우에만 저장
        df = pd.DataFrame(records)
        filename = f"wildfire_{year}.csv"
        df.to_csv(filename, index=False, encoding="utf-8-sig")
        print(f"{year}년 데이터 저장 완료: {filename}")

Requesting API...
API Response:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><response><header><resultCode>00</resultCode><resultMsg>NORMAL SERVICE.</resultMsg></header><body><items/><numOfRows>1000</numOfRows><pageNo>1</pageNo><totalCount>0</totalCount></body></response>

Response Status Code: 200
Response Headers: {'Access-Control-Allow-Origin': '*', 'Content-Encoding': 'gzip', 'Content-Type': 'application/xml', 'Content-Length': '186', 'Date': 'Tue, 27 May 2025 05:41:42 GMT', 'Server': 'NIA API Server'}

Request URL: http://apis.data.go.kr/1400000/forestStusService/getfirestatsservice?serviceKey=8PrqzfscVoUU4WHLk0FgBthrspJ0El8ChP7PMQ819DDgQs4dpck4JcNUiMm1kPfdL1dYx%2FvPRiEyYtJvK8QiVQ%3D%3D&searchStDt=20250101&searchEdDt=20251231&numOfRows=1000&pageNo=1&_type=xml
총 건수: 0


### weather data API

In [None]:
import pandas as pd

# 1. 깨진 파일을 EUC-KR로 읽기
df = pd.read_csv(
    "pwd_csv",
    encoding="euc-kr",
)

# 2. 정상적으로 읽혔는지 확인
print(df.head())

# 3. UTF-8로 다시 저장
df.to_csv("filename", index=False, encoding="utf-8-sig")