#API 크롤링

한국환경공단_에어코리아_대기오염정보

https://www.data.go.kr/data/15073861/openapi.do#tab_layer_detail_function


In [1]:
from urllib.parse import urlencode
from urllib.request import urlopen
import json
import csv


#로그 설정

In [2]:
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG) # 모든 레벨의 로그를 Handler들에게 전달해야 합니다.
logger.propagate = False # do not pass logs to the default logger
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', '%Y-%m-%d %H:%M:%S')

# INFO 레벨 이상의 로그를 콘솔에 출력하는 Handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

# INFO 레벨 이상의 로그를 `info.log`에 출력하는 Handler
file_info_handler = logging.FileHandler('info.log', mode="w")
file_info_handler.setLevel(logging.INFO)
file_info_handler.setFormatter(formatter)
logger.addHandler(file_info_handler)


# ERROR 레벨 이상의 로그를 `error.log`에 출력하는 Handler
file_error_handler = logging.FileHandler('error.log', mode="w")
file_error_handler.setLevel(logging.ERROR)
file_error_handler.setFormatter(formatter)
logger.addHandler(file_error_handler)


In [3]:
logger.info('This is an info')
logger.error("This is an error")

2023-05-22 04:36:47:INFO:This is an info
2023-05-22 04:36:47:ERROR:This is an error


# 크롤링 결과 파일 설정

In [4]:
output_filename = "./data.csv"

# 출력 파일 초기화
f = open(output_filename, 'w', encoding='utf-8-sig', newline='')
csv_writer = csv.writer(f, delimiter=',')
csv_writer.writerow(['시각','위치','미세먼지농도','오존농도'])
f.close()
logger.info('출력파일 초기화')


# 출력 파일에 줄 별로 추가
def write_csv(filename, json_data, stationName):

  logger.info('출력파일에 쓰기')

  items = json_data['response']['body']['items']
  f = open(filename, 'a', encoding='utf-8-sig', newline='')
  csv_writer = csv.writer(f, delimiter=',')

  for item in items:
    onerow = []
    onerow.append(item['dataTime'])
    onerow.append(stationName)
    onerow.append(item['pm10Value'])
    onerow.append(item['o3Value'])
    csv_writer.writerow(onerow)
  f.close()




2023-05-22 04:37:17:INFO:출력파일 초기화


# 크롤링

In [5]:
url = 'http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getMsrstnAcctoRltmMesureDnsty'
serviceKey = 'r2MsCVHrzUq5gpAZOUEOEILqP8gdoREgvvyeq1OkW5xykkljuTcjaMqrfQXO5xsRA4rcwnQ7jqn0ZDz/JWCB9Q=='



num_of_rows_per_one_page = 10
pageNo = 1
stationName = '애월읍'
dataTerm = 'DAILY'


while True:
  queryParams = '?' + urlencode({ 'numOfRows' : num_of_rows_per_one_page, 'pageNo' : pageNo, 'returnType' : 'json', 'stationName' : stationName,'dataTerm':dataTerm })
  request_url = url + queryParams+'&serviceKey='+serviceKey
  

  logger.info(request_url)

  try:
    response_body = urlopen(request_url).read()
    json_data = json.loads(response_body)
  except Execption as e:
    logger.error(e)
    break  
  
  write_csv(output_filename, json_data, stationName)

  total_count = json_data['response']['body']['totalCount']
  remaining_count = total_count - (num_of_rows_per_one_page * pageNo)

  logger.info("total_count="+str(total_count)+",pageNo="+str(pageNo)+",remaining_count="+str(remaining_count))
  
  pageNo += 1
  if remaining_count < 0 :
    break

2023-05-22 04:37:18:INFO:http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getMsrstnAcctoRltmMesureDnsty?numOfRows=10&pageNo=1&returnType=json&stationName=%EC%95%A0%EC%9B%94%EC%9D%8D&dataTerm=DAILY&serviceKey=r2MsCVHrzUq5gpAZOUEOEILqP8gdoREgvvyeq1OkW5xykkljuTcjaMqrfQXO5xsRA4rcwnQ7jqn0ZDz/JWCB9Q==
2023-05-22 04:37:18:INFO:출력파일에 쓰기
2023-05-22 04:37:18:INFO:total_count=23,pageNo=1,remaining_count=13
2023-05-22 04:37:18:INFO:http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getMsrstnAcctoRltmMesureDnsty?numOfRows=10&pageNo=2&returnType=json&stationName=%EC%95%A0%EC%9B%94%EC%9D%8D&dataTerm=DAILY&serviceKey=r2MsCVHrzUq5gpAZOUEOEILqP8gdoREgvvyeq1OkW5xykkljuTcjaMqrfQXO5xsRA4rcwnQ7jqn0ZDz/JWCB9Q==
2023-05-22 04:37:18:INFO:출력파일에 쓰기
2023-05-22 04:37:18:INFO:total_count=23,pageNo=2,remaining_count=3
2023-05-22 04:37:18:INFO:http://apis.data.go.kr/B552584/ArpltnInforInqireSvc/getMsrstnAcctoRltmMesureDnsty?numOfRows=10&pageNo=3&returnType=json&stationName=%EC%95%A0%EC%9B%94%EC%9D%8D&dataTerm=DAI