<a href="https://colab.research.google.com/github/graphy-young/endslab/blob/main/2022/research/openapi_nifos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialize notebook

### Import requirements

In [1]:
!pip install ipython-autotime
%load_ext autotime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ipython-autotime
  Downloading ipython_autotime-0.3.1-py2.py3-none-any.whl (6.8 kB)
Collecting pyblaze
  Downloading pyblaze-2.3.2-py3-none-any.whl (66 kB)
[K     |████████████████████████████████| 66 kB 2.0 MB/s 
Collecting scikit-learn<0.25.0,>=0.23.0
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 28.2 MB/s 
Installing collected packages: scikit-learn, pyblaze, ipython-autotime
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.0.2
    Uninstalling scikit-learn-1.0.2:
      Successfully uninstalled scikit-learn-1.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
yellowbrick 1.4 requires scikit-learn>=1.0.0, but you have scikit-

In [4]:
import requests, json
import pandas as pd
from pprint import pprint

time: 2.31 ms (started: 2022-07-04 07:34:07 +00:00)


## Define constants & variables

### Mount Google Drive, then load connection info

In [5]:
from google.colab import drive

DRIVE_PATH = "/content/drive" 
drive.mount(DRIVE_PATH)

JSON_PATH = f"{DRIVE_PATH}/MyDrive/Colab Notebooks/ENDS_LAB/openapi.json"
OPEN_API = json.load(open(JSON_PATH, 'r'))

Mounted at /content/drive
time: 27.1 s (started: 2022-07-04 07:34:11 +00:00)


### 산림청 국립산림과학원_청정넷_측정데이터 (AicanDustData)

In [11]:
### Documentation -> https://www.data.go.kr/data/15080316/fileData.do

nifos_measurement = dict()
nifos_measurement["OPENAPI_URL"] = OPEN_API["nifos_msmt"]["endpoint"]
nifos_measurement["API_KEY_ENCODED"] = OPEN_API["nifos_msmt"]["key"] # Insert your Open API encoded key
nifos_measurement["API_KEY_DECODED"] = requests.utils.unquote(nifos_measurement["API_KEY_ENCODED"])
nifos_measurement["QUOTA_LIMIT"] = 180000

nifos_measurement["parameters"] = {
    'serviceKey' : nifos_measurement["API_KEY_DECODED"], # API Authorizaiton key
    'pageNo' : '1', # Page number (Default: 1)
    'numOfRows' : str(nifos_measurement['QUOTA_LIMIT']), # Result row numbers per a page (Default: 10) ++ Rate exceed threshold 최대 약 18만건?
    'contentType' : 'JSON' # Response data type (xml, json / Default: xml)
    } # Make request as 'between' using "endDt" same format as startDt 
    #'startDt' : '202104010000', # yyyyMMddHHmmss (Default: None)

nifos_measurement["column_list"] = [
                                    "measured_datetime", # obsrt_dtm, 관측일시
                                    "temperature", # obsrt_dtm, 관측_온도
                                    "avoc_pm01", # obsrt_dtm, 관측_습도
                                    "wind_speed", # obsrt_ws, 관측_풍속
                                    "pm10", # obsrt_pm10_val, 관측_미세먼지
                                    "pm25", # obsrt_pm25_val, 관측_초미세먼지
                                    "avoc_pm10", # avoc_obsrt_pm10_val, AVOC_관측_미세먼지
                                    "avoc_pm25", # avoc_obsrt_pm25_val, AVOC_관측_초미세먼지
                                    "relative_humidity", # obsrt_hmdt, 관측_습도
                                    "pm01", # obsrt_pm01_val, 관측_극초미세먼지
                                    "station_code", # obsrr_tpcd, 관측소_구분코드
                                    "wind_direction" # obsrt_wndrc_val, 관측_풍향
                                ]

time: 18.8 ms (started: 2022-07-04 07:49:25 +00:00)


### 산림청 국립산림과학원_청정넷_운영현황 (AicanObsrrInfo)

In [None]:
nifos_station = dict()

nifos_station["OPENAPI_URL"] = OPEN_API["nifos_station"]["endpoint"]
nifos_station["API_KEY_ENCODED"] = OPEN_API["nifos_station"]["key"] # Insert your Open API encoded key
nifos_station["API_KEY_DECODED"] = requests.utils.unquote(nifos_station["API_KEY_ENCODED"])

nifos_station["parameters"] = {
    'serviceKey' : nifos_station["API_KEY_DECODED"], # API Authorizaiton key
    'perPage': "1000000" # Result row numbers per a page (Default: 10)
}


nifos_station["column_list"] = [
                                "station_name", # obsrr_nm, 관측소 명
                                "equip_name", # eqpmn_nm, 장비 명
                                "description", # obsrr_dscrt, 관측소 설명
                                "equip_model", # eqpmn_model_nm, 장비 모델 명
                                "obsrv_inst_dt", # obsrr_instl_dt, 관측소 설치일시
                                "station_addr", # obsrr_addr, 관측소주소
                                "equip_mfr_name", # eqpmn_mkr_nm, 장비 제조사 명
                                "station_code", # obsrr_tpcd, 관측소 구분 코드
                                "station_group", # obsrr_group_cd, 관측소 그룹 코드
                                "longitude", # obsrr_lngtd, 경도
                                "elevation", # obsrr_haslv, 고도
                                "latitude", # obsrr_lttd, 위도
                                "model_number", # obsrr_mdm_no, 모델번호
]
nifos_station["column_list"] = [
                                "longitude",
                                "station_group"
                                "station_name",
                                "description",
                                "obsrv_inst_dt",
                                "latitude",
                                "station_addr",
                                "reg_dt",
                                "equip_name",
                                "equip_model",
                                "equip_mfr_name",
                                "elevation"
]

time: 13.5 ms (started: 2022-07-03 04:39:50 +00:00)


# Get data from Open API

## Get station information from AicanObsrrInfo

In [None]:
nifos_station["response"] = requests.get(nifos_station["OPENAPI_URL"], params=nifos_station["parameters"])
pprint(nifos_station["response"])

nifos_station["content"] = json.loads(nifos_station["response"].content)

print(f"[LOG] Successfully load {nifos_station['content']['currentCount']} rows from API")

nifos_station["data"] = pd.DataFrame.from_dict(nifos_station["content"]["data"])

display(nifos_station["data"])

<Response [200]>
[LOG] Successfully load 60 rows from API


Unnamed: 0,관측소경도,관측소그룹명,관측소명,관측소설명,관측소설치일,관측소위도,관측소주소,입력일시,장비명,장비모델명,장비제조사명,해발고도
0,127.045243,홍릉,홍릉_도심,소나무 숲 13그루가 식재된 교통섬,2019-08-26,37.85047,서울 동대문구 홍릉로 22 (청량리역환승센터 교통섬),2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),23m
1,127.048591,홍릉,홍릉_숲내부5m,상층 소나무 숲 아래의 내부 지점,2019-08-27,37.599037,서울 성북구 하월곡동 산 5-57 (산불초소 뒤),2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),56m
2,127.048591,홍릉,홍릉_숲외부20m,상층 소나무 숲 위의 외부 지점,2019-08-27,37.599037,서울 성북구 하월곡동 산 5-57 (산불초소 뒤),2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),81m
3,127.100294,고매,고매_도로,도로와 인접하여 위치한 백합나무 숲,2019-11-14,37.219665,경기 용인시 기흥구 고매동 486-1 고매시험림,2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),47m
4,127.100906,고매,고매_50m,도로와 50미터 거리에 위치한 백합나무 숲,2019-11-14,37.219665,경기 용인시 기흥구 고매동 486-1 고매시험림,2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),48m
5,127.101927,고매,고매_150m,도로와 150미터 거리에 위치한 백합나무 숲,2019-11-14,37.219665,경기 용인시 기흥구 고매동 486-1 고매시험림,2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),49m
6,126.726128,시화,시화_산단,곰솔이 식재된 산단주변의 소망공원,2019-12-16,37.334244,경기 시흥시 정왕동 1368 소망공원,2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),3m
7,126.735296,시화,시화_차단숲,곰솔이 식재된 차단숲,2019-12-16,37.34137,경기 시흥시 정왕동 1961 차단숲,2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),14m
8,126.735757,시화,시화_주거,곰솔이 식재된 주거지역 주변 중앙공원,2019-12-16,37.349817,경기 시흥시 정왕동 1845-1 중앙공원,2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),9m
9,127.036108,양재,양재_도로,경부고속도로와 가까운 침엽수가 식재된 산책로,2020-01-09,37.469715,서울 서초구 양재동 236 (동상 뒤),2021-03-13,EDM365,EDM365-SVC,GRIMM(독일),27m


time: 1.36 s (started: 2022-07-02 07:34:45 +00:00)


## Get observations from AicanDustData API

In [None]:
max_iter_num = 0

response = requests.get(AicanDustData["OPENAPI_URL"], params=AicanDustData["parameters"])
pprint(response)

response_json = json.loads(response.content)

result_code = response_json['resultCode']
result_message = response_json['resultMsg']
num_of_rows = int(response_json['numOfRows'])
page_no = int(response_json['pageNo'])
total_count = int(response_json['totalCount'])

data = pd.DataFrame.from_dict(response_json['items'])

print(f"Load {AicanDustData['QUOTA_LIMIT']} of {total_count} rows successfully with code {result_code}: {result_message}")

if AicanDustData['QUOTA_LIMIT'] < total_count:
  max_iter_num = (total_count // AicanDustData['QUOTA_LIMIT']) - 1
  if total_count % AicanDustData['QUOTA_LIMIT'] > 0:
    max_iter_num += 1
  for page_num in range(max_iter_num):
    AicanDustData["parameters"]["pageNo"] = str(int(AicanDustData["parameters"]["pageNo"]) + 1)
    response = requests.get(AicanDustData["OPENAPI_URL"], params=AicanDustData["parameters"])
    pprint(response)
    print(f"Load {response_json['numOfRows']} of {total_count} rows successfully with code {result_code}: {result_message}")
    response_json = json.loads(response.content)
    data = data.append(pd.DataFrame.from_dict(response_json['items']), ignore_index=True)
  else:
    AicanDustData["parameters"]["pageNo"] = "1"

# MEMO: prettify codes using divmod()

display(data)

In [None]:
data.count()

# Data Transformation

## Rename column and datetime format, then save in Google Drive

In [16]:
nifos_measurement['data'].columns = nifos_measurement['column_list']
nifos_measurement['data'] = nifos_measurement['data'][['station_code', 'measured_datetime', 'temperature', 'relative_humidity', 'wind_speed', 'wind_direction', 'pm10', 'pm25', 'pm01', 'avoc_pm10', 'avoc_pm25', 'avoc_pm01']]

nifos_measurement['data']['measured_datetime'] = pd.to_datetime(nifos_measurement['data']['measured_datetime'], format='%Y%m%d%H%M')

display(nifos_measurement['data'])

Index(['station_code', 'measured_datetime', 'temperature', 'relative_humidity',
       'wind_speed', 'wind_direction', 'pm10', 'pm25', 'pm01', 'avoc_pm10',
       'avoc_pm25', 'avoc_pm01'],
      dtype='object')

time: 141 ms (started: 2022-07-04 07:55:39 +00:00)


In [31]:
nifos_measurement['data'].to_csv(f"{DRIVE_PATH}/MyDrive/Colab Notebooks/ENDS_LAB/nifos_measurement.csv", index=None)

time: 1min 4s (started: 2022-07-04 08:07:24 +00:00)


In [32]:
nifos_measurement['data'] = pd.read_csv(f"{DRIVE_PATH}/MyDrive/Colab Notebooks/ENDS_LAB/nifos_measurement.csv")
display(nifos_measurement['data'])

Unnamed: 0,station_code,measured_datetime,temperature,relative_humidity,wind_speed,wind_direction,pm10,pm25,pm01,avoc_pm10,avoc_pm25,avoc_pm01
0,11,2019-08-25 22:20:00,20.934,46.582,0.000,0.000,3.648,3.548,3.371,3.235,3.185,3.027
1,11,2019-08-25 22:30:00,21.293,49.169,0.000,0.000,3.683,3.647,3.513,3.230,3.048,2.925
2,11,2019-08-25 22:40:00,21.799,50.952,0.000,0.000,3.592,3.552,3.433,3.103,3.088,2.945
3,11,2019-08-25 22:50:00,21.434,50.012,0.000,0.000,3.705,3.690,3.573,3.175,3.172,3.062
4,11,2019-08-25 23:00:00,21.251,48.937,0.000,0.000,3.867,3.833,3.678,3.307,3.248,3.088
...,...,...,...,...,...,...,...,...,...,...,...,...
5850827,31,2022-06-30 11:40:00,22.802,99.972,0.872,145.790,21.207,20.393,19.532,12.685,12.375,11.349
5850828,21,2022-06-30 11:40:00,22.608,98.220,0.333,84.990,1.980,1.343,0.385,2.544,1.782,0.589
5850829,33,2022-06-30 11:40:00,22.819,98.146,0.670,136.071,3.703,3.207,2.418,3.203,2.608,1.765
5850830,32,2022-06-30 11:40:00,22.769,96.759,0.222,31.068,3.342,3.280,3.113,6.352,4.697,3.113


time: 10.4 s (started: 2022-07-04 08:08:29 +00:00)
