In [1]:
# -*- coding: utf-8 -*-
"""대기오염과 미세먼지 데이터 분석 - 대안 API 사용"""
import urllib.request
import urllib.parse
import datetime
import json
import pandas as pd

In [2]:
ServiceKey = "RKp7mtfryeAiTFH74uWHBriQrfiZrYTEp1ujt5BSi9y7eqmJvOMV1dYp3KfFSp4uMf19WPjBjwz5vC%2Fe0UbDfg%3D%3D"

In [3]:
#[CODE 1]
def getRequestUrl(url):
    req = urllib.request.Request(url)
    try:
        response = urllib.request.urlopen(req)
        if response.getcode() == 200:
            print("[%s] Url Request Success" % datetime.datetime.now())
            return response.read().decode('utf-8')
    except Exception as e:
        print(e)
        print("[%s] Error for URL : %s" % (datetime.datetime.now(), url))
        return None


In [4]:
#[CODE 2] - 시도별 실시간 측정정보 조회 API 사용 (더 안정적)
def reqAirInfo(sido, beginDay, endDay, numOfRows):
    service_url = 'http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty'
    parameters = "?returnType=json&serviceKey=" + ServiceKey
    parameters += "&sidoName=" + urllib.parse.quote(sido)
    parameters += "&numOfRows=" + str(numOfRows)
    parameters += "&pageNo=1"
    parameters += "&ver=1.0"
    url = service_url + parameters
    print("API URL:", url)
    responseDecode = getRequestUrl(url)
    if (responseDecode == None):
        return None
    else:
        try:
            return json.loads(responseDecode)
        except json.JSONDecodeError as e:
            print("JSON 파싱 오류:", e)
            print("응답 내용:", responseDecode[:500])
            return None

In [7]:
# 측정소별 조회를 위한 대안 함수
def reqStationAirInfo(stationName, numOfRows):
    service_url = 'http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getMsrstnAcctoRltmMesureDnsty'
    parameters = "?returnType=json&serviceKey=" + ServiceKey
    parameters += "&stationName=" + urllib.parse.quote(stationName)
    parameters += "&dataTerm=DAILY"
    parameters += "&numOfRows=" + str(numOfRows)
    parameters += "&pageNo=1"
    parameters += "&ver=1.0"
    url = service_url + parameters
    print("측정소별 API URL:", url)
    responseDecode = getRequestUrl(url)
    if (responseDecode == None):
        return None
    else:
        try:
            return json.loads(responseDecode)
        except json.JSONDecodeError as e:
            print("JSON 파싱 오류:", e)
            print("응답 내용:", responseDecode[:500])
            return None

In [8]:
def getAirInfoItem(item, result):
    stationName = item.get('stationName', '')
    dataTime = item.get('dataTime', '')
    so2Value = item.get('so2Value', '')
    coValue = item.get('coValue', '')
    o3Value = item.get('o3Value', '')
    no2Value = item.get('no2Value', '')
    pm10Value = item.get('pm10Value', '')
    pm25Value = item.get('pm25Value', '')
    result.append([stationName, dataTime, so2Value, coValue, o3Value, no2Value, pm10Value, pm25Value])

In [9]:
# 사용자 입력
print("=== 대기오염 데이터 수집 ===")
print("1. 측정소별 조회")
print("2. 시도별 조회 (서울 전체)")
choice = input("선택하세요 (1 또는 2): ")

=== 대기오염 데이터 수집 ===
1. 측정소별 조회
2. 시도별 조회 (서울 전체)
선택하세요 (1 또는 2): 2


In [10]:
if choice == "1":
    where = input('측정소명을 입력하세요 (예: 종로구): ')
    numOfRows = input('조회할 데이터 수 (예: 100): ')

    print(f"\n=== {where} 측정소 실시간 데이터 조회 ===")
    jsonResponse = reqStationAirInfo(where, numOfRows)

elif choice == "2":
    sido = input('시도명을 입력하세요 (예: 서울): ')
    numOfRows = input('조회할 데이터 수 (예: 100): ')

    print(f"\n=== {sido} 시도 전체 실시간 데이터 조회 ===")
    jsonResponse = reqAirInfo(sido, '', '', numOfRows)

else:
    print("잘못된 선택입니다.")
    exit()

시도명을 입력하세요 (예: 서울): 서울
조회할 데이터 수 (예: 100): 20

=== 서울 시도 전체 실시간 데이터 조회 ===
API URL: http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty?returnType=json&serviceKey=RKp7mtfryeAiTFH74uWHBriQrfiZrYTEp1ujt5BSi9y7eqmJvOMV1dYp3KfFSp4uMf19WPjBjwz5vC%2Fe0UbDfg%3D%3D&sidoName=%EC%84%9C%EC%9A%B8&numOfRows=20&pageNo=1&ver=1.0
[2025-08-07 00:20:55.542358] Url Request Success


In [11]:
# 응답 처리
result = []

if jsonResponse:
    print("API 응답 구조:", list(jsonResponse.keys()))

    if 'response' in jsonResponse:
        response = jsonResponse['response']

        # 헤더 확인
        if 'header' in response:
            header = response['header']
            result_code = header.get('resultCode')
            result_msg = header.get('resultMsg')
            print(f"응답 코드: {result_code}")
            print(f"응답 메시지: {result_msg}")

            if result_code == '00':
                print("✅ API 호출 성공!")
            elif result_code == '03':
                print("❌ 데이터가 없습니다.")
                exit()
            elif result_code == '11':
                print("❌ 필수 매개변수 누락")
                exit()
            elif result_code == '20':
                print("❌ 서비스 접근 권한 없음 - 공공데이터포털에서 서비스 신청 필요")
                exit()
        # 데이터 추출
        if 'body' in response and response['body']:
            body = response['body']

            if 'items' in body and body['items']:
                items = body['items']
                print(f"총 {len(items)}개의 실시간 데이터 발견")

                for item in items:
                    getAirInfoItem(item, result)

                if result:
                    columnNames = ["location", "datetime", "so2", "co", "o3", "no2", "pm10", "pm25"]
                    result_df = pd.DataFrame(result, columns=columnNames)
                    # 현재 시간으로 파일명 생성
                    current_time = datetime.datetime.now().strftime('%Y%m%d_%H%M')
                    if choice == "1":
                        filename = f'대기오염데이터_{where}_{current_time}.csv'
                    else:
                        filename = f'대기오염데이터_{sido}전체_{current_time}.csv'

                    result_df.to_csv(filename, index=False, encoding='utf-8-sig')

                    print(f'\n✅ 파일 생성 완료: {filename}')
                    print(f"총 {len(result)}개의 데이터 저장됨")
                    print("\n📊 데이터 미리보기:")
                    print(result_df.head(10))
                    # 측정소별 요약 정보
                    if len(result_df) > 0:
                        print(f"\n📍 측정소 현황:")
                        station_count = result_df['location'].value_counts()
                        print(station_count)
                else:
                    print("추출된 데이터가 없습니다.")
            else:
                print("응답에 items가 없습니다.")
                print("Body 내용:", body)
        else:
            print("응답에 body가 없습니다.")
    else:
        print("잘못된 응답 형식입니다.")
else:
    print("API 응답을 받지 못했습니다.")

print("\n=== 완료 ===")

API 응답 구조: ['response']
응답 코드: 00
응답 메시지: NORMAL_CODE
✅ API 호출 성공!
총 20개의 실시간 데이터 발견

✅ 파일 생성 완료: 대기오염데이터_서울전체_20250807_0020.csv
총 20개의 데이터 저장됨

📊 데이터 미리보기:
  location          datetime    so2   co     o3    no2 pm10 pm25
0      강남구  2025-08-07 09:00  0.002  0.4  0.041  0.012   20    9
1      서초구  2025-08-07 09:00  0.003  0.4  0.041  0.014   15    6
2     도산대로  2025-08-07 09:00  0.002  0.6  0.041  0.013   22    6
3     강남대로  2025-08-07 09:00  0.003  0.5  0.024  0.027   17    7
4      송파구  2025-08-07 09:00  0.002  0.4  0.041  0.013   22   18
5      강동구  2025-08-07 09:00  0.002  0.5  0.045  0.004   17    9
6     천호대로  2025-08-07 09:00  0.002  0.5  0.030  0.019   13    4
7      금천구  2025-08-07 09:00  0.002  0.3  0.032  0.013   25   20
8     시흥대로  2025-08-07 09:00  0.003  0.4  0.030  0.022   26   12
9      강북구  2025-08-07 09:00  0.002  0.3  0.047  0.003   22   12

📍 측정소 현황:
location
강남구     1
서초구     1
도산대로    1
강남대로    1
송파구     1
강동구     1
천호대로    1
금천구     1
시흥대로    1
강북구     1
양천구     

In [12]:
import pandas as pd

data_df=pd.DataFrame(result, columns=["location", "day", "so2", "co", "o3", "no2", "pm10", "pm25"])
data_df = data_df.drop('location', axis=1)
data_df.insert(0, 'location', '종로구')
data_df

Unnamed: 0,location,day,so2,co,o3,no2,pm10,pm25
0,종로구,2025-08-07 09:00,0.002,0.4,0.041,0.012,20,9
1,종로구,2025-08-07 09:00,0.003,0.4,0.041,0.014,15,6
2,종로구,2025-08-07 09:00,0.002,0.6,0.041,0.013,22,6
3,종로구,2025-08-07 09:00,0.003,0.5,0.024,0.027,17,7
4,종로구,2025-08-07 09:00,0.002,0.4,0.041,0.013,22,18
5,종로구,2025-08-07 09:00,0.002,0.5,0.045,0.004,17,9
6,종로구,2025-08-07 09:00,0.002,0.5,0.03,0.019,13,4
7,종로구,2025-08-07 09:00,0.002,0.3,0.032,0.013,25,20
8,종로구,2025-08-07 09:00,0.003,0.4,0.03,0.022,26,12
9,종로구,2025-08-07 09:00,0.002,0.3,0.047,0.003,22,12


In [13]:
import pandas as pd

data_df1=pd.DataFrame(result, columns=["location", "day", "so2", "co", "o3", "no2", "pm10", "pm25"])
data_df1 = data_df1.drop('location', axis=1)
data_df1.insert(0, 'location', '서초구')
data_df1

Unnamed: 0,location,day,so2,co,o3,no2,pm10,pm25
0,서초구,2025-08-07 09:00,0.002,0.4,0.041,0.012,20,9
1,서초구,2025-08-07 09:00,0.003,0.4,0.041,0.014,15,6
2,서초구,2025-08-07 09:00,0.002,0.6,0.041,0.013,22,6
3,서초구,2025-08-07 09:00,0.003,0.5,0.024,0.027,17,7
4,서초구,2025-08-07 09:00,0.002,0.4,0.041,0.013,22,18
5,서초구,2025-08-07 09:00,0.002,0.5,0.045,0.004,17,9
6,서초구,2025-08-07 09:00,0.002,0.5,0.03,0.019,13,4
7,서초구,2025-08-07 09:00,0.002,0.3,0.032,0.013,25,20
8,서초구,2025-08-07 09:00,0.003,0.4,0.03,0.022,26,12
9,서초구,2025-08-07 09:00,0.002,0.3,0.047,0.003,22,12
