In [2]:
import pandas as pd
import urllib
import urllib.request
import json
import pickle

## Params
> base_time
- 예보 발표시각: 0200, 0500, 0800, 1100, 1400, 1700, 2000, 2300
- API 제공 시간(~이후) : 02:05, 05:05, 08:05, 11:05, 14:05, 17:05, 20:05, 23:05

> base_date
- 최근 1일간의 자료만 제공한다는 점에 유의

## **print 결과에서 fcstDate와 fcstTime 확인** 
예를 들어, 2021년 5월 23일이 예측 대상 날짜라면,  
fcstDate: 20210523 fcstTime: 0000을 시작으로,  
fcstDate: 20150524 fcstTIme: 0000까지 나와야 함.  
예시:
```
category:Humidity, REH fcstDate: 20210523 fcstTime: 0000 fcstValue: 85
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0000 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0000 fcstValue: 15
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0000 fcstValue: 245
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0000 fcstValue: 2.1 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0300 fcstValue: 90
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0300 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0300 fcstValue: 14
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0300 fcstValue: 222
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0300 fcstValue: 1.3 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0600 fcstValue: 90
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0600 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0600 fcstValue: 14
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0600 fcstValue: 191
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0600 fcstValue: 1 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0900 fcstValue: 70
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0900 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0900 fcstValue: 19
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0900 fcstValue: 153
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0900 fcstValue: 1.8 

category:Humidity, REH fcstDate: 20210523 fcstTime: 1200 fcstValue: 55
category:Cloud, SKY fcstDate: 20210523 fcstTime: 1200 fcstValue: 3
category:Temperature, T3H fcstDate: 20210523 fcstTime: 1200 fcstValue: 23
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 1200 fcstValue: 250
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 1200 fcstValue: 2 

category:Humidity, REH fcstDate: 20210523 fcstTime: 1500 fcstValue: 55
category:Cloud, SKY fcstDate: 20210523 fcstTime: 1500 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 1500 fcstValue: 24
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 1500 fcstValue: 251
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 1500 fcstValue: 4 

category:Humidity, REH fcstDate: 20210523 fcstTime: 1800 fcstValue: 60
category:Cloud, SKY fcstDate: 20210523 fcstTime: 1800 fcstValue: 4
category:Temperature, T3H fcstDate: 20210523 fcstTime: 1800 fcstValue: 21
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 1800 fcstValue: 245
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 1800 fcstValue: 3.8 

category:Humidity, REH fcstDate: 20210523 fcstTime: 2100 fcstValue: 75
category:Cloud, SKY fcstDate: 20210523 fcstTime: 2100 fcstValue: 4
category:Temperature, T3H fcstDate: 20210523 fcstTime: 2100 fcstValue: 17
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 2100 fcstValue: 212
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 2100 fcstValue: 2.6 

category:Humidity, REH fcstDate: 20210524 fcstTime: 0000 fcstValue: 80
category:Cloud, SKY fcstDate: 20210524 fcstTime: 0000 fcstValue: 4
category:Temperature, T3H fcstDate: 20210524 fcstTime: 0000 fcstValue: 16
category:WindDirection, VEC fcstDate: 20210524 fcstTime: 0000 fcstValue: 220
category:WindSpeed, WSD fcstDate: 20210524 fcstTime: 0000 fcstValue: 2.3 
```

## Original DACON code
https://dacon.io/competitions/official/235720/codeshare/2555?page=1&dtype=recent

In [42]:
date = '20210522' # 예보 발표일자; 최근 1일간의 자료만 제공합니다.
base_time = '2000' # 예보 발표시각: 0200, 0500, 0800, 1100, 1400, 1700, 2000, 2300

# dangjin
nx_dangjin = '53' # 충남 당진시 석문면
ny_dangjin = '114'

In [43]:
key = "sNfoTDclWrvFGpIEFDEXvj+EaCjLrOILF7IYehdRCcYBxnMP0zna40R1UmY6qfWBG0gJ16c3T8ManHwvhACk7w=="
url = 'http://apis.data.go.kr/1360000/VilageFcstInfoService/getVilageFcst'

queryParams = '?' + urllib.parse.urlencode(
    {
        urllib.parse.quote_plus('serviceKey') : key, 
        urllib.parse.quote_plus('numOfRows') : '150', # 그냥 넉넉하게 설정; 데이터가 뒤에서 잘렸다면 이 값을 높여야 함.
        urllib.parse.quote_plus('dataType') : 'JSON', # JSON, XML 두가지 포멧을 제공합니다.
        urllib.parse.quote_plus('base_date') : date, 
        urllib.parse.quote_plus('base_time') : base_time, 
        urllib.parse.quote_plus('nx') : nx_dangjin, 
        urllib.parse.quote_plus('ny') : ny_dangjin
    }
)

response = urllib.request.urlopen(url + queryParams).read()
response = json.loads(response)

In [44]:
date_pandas = '2021-05-23' # 예측 대상 일자
date_json = '20210523' # 예측 대상 일자
date_json_tmrw = '20210524' # 예측 대상 일자의 다음 날

# pd DataFrame
fcst_df= pd.DataFrame()
fcst_df['Forecast_time'] = [f'{date_pandas} {hour}:00' for hour in range(25)] # not 24; for better interpolation
row_idx = 0

for i, data in enumerate(response['response']['body']['items']['item']):
    if (data['fcstDate'] == date_json) or ((data['fcstDate'] == date_json_tmrw) and (data['fcstTime'] == '0000')):

        if data['category']=='REH':
            fcst_df.loc[row_idx, 'Humidity'] = float(data['fcstValue'])
            print('category:Humidity,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='T3H':
            fcst_df.loc[row_idx, 'Temperature'] = float(data['fcstValue'])
            print('category:Temperature,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='SKY':
            fcst_df.loc[row_idx, 'Cloud'] = float(data['fcstValue'])
            print('category:Cloud,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='VEC':
            fcst_df.loc[row_idx, 'WindDirection'] = float(data['fcstValue'])
            print('category:WindDirection,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='WSD':
            fcst_df.loc[row_idx, 'WindSpeed'] = float(data['fcstValue'])
            print('category:WindSpeed,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'],'\n')
            row_idx+=3

category:Humidity, REH fcstDate: 20210523 fcstTime: 0000 fcstValue: 85
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0000 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0000 fcstValue: 15
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0000 fcstValue: 245
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0000 fcstValue: 2.1 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0300 fcstValue: 90
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0300 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0300 fcstValue: 14
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0300 fcstValue: 222
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0300 fcstValue: 1.3 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0600 fcstValue: 90
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0600 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0600 fcstValue: 14
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 060

In [52]:
with open('dangjin_raw.pkl', 'wb') as f:
    pickle.dump(fcst_df, f)

## ulsan

In [48]:
date = '20210522' # 예보 발표일자; 최근 1일간의 자료만 제공합니다.
base_time = '2000' # 예보 발표시각: 0200, 0500, 0800, 1100, 1400, 1700, 2000, 2300

# dangjin
nx_dangjin = '53' # 충남 당진시 석문면
ny_dangjin = '114'

In [49]:
key = "sNfoTDclWrvFGpIEFDEXvj+EaCjLrOILF7IYehdRCcYBxnMP0zna40R1UmY6qfWBG0gJ16c3T8ManHwvhACk7w=="
url = 'http://apis.data.go.kr/1360000/VilageFcstInfoService/getVilageFcst'

queryParams = '?' + urllib.parse.urlencode(
    {
        urllib.parse.quote_plus('serviceKey') : key, 
        urllib.parse.quote_plus('numOfRows') : '150', # 그냥 넉넉하게 설정; 데이터가 뒤에서 잘렸다면 이 값을 높여야 함.
        urllib.parse.quote_plus('dataType') : 'JSON', # JSON, XML 두가지 포멧을 제공합니다.
        urllib.parse.quote_plus('base_date') : date, 
        urllib.parse.quote_plus('base_time') : base_time, 
        urllib.parse.quote_plus('nx') : nx_dangjin, 
        urllib.parse.quote_plus('ny') : ny_dangjin
    }
)

response = urllib.request.urlopen(url + queryParams).read()
response = json.loads(response)

In [50]:
date_pandas = '2021-05-23' # 예측 대상 일자
date_json = '20210523' # 예측 대상 일자
date_json_tmrw = '20210524' # 예측 대상 일자의 다음 날

# pd DataFrame
fcst_df= pd.DataFrame()
fcst_df['Forecast_time'] = [f'{date_pandas} {hour}:00' for hour in range(25)] # not 24; for better interpolation
row_idx = 0

for i, data in enumerate(response['response']['body']['items']['item']):
    if (data['fcstDate'] == date_json) or ((data['fcstDate'] == date_json_tmrw) and (data['fcstTime'] == '0000')):

        if data['category']=='REH':
            fcst_df.loc[row_idx, 'Humidity'] = float(data['fcstValue'])
            print('category:Humidity,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='T3H':
            fcst_df.loc[row_idx, 'Temperature'] = float(data['fcstValue'])
            print('category:Temperature,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='SKY':
            fcst_df.loc[row_idx, 'Cloud'] = float(data['fcstValue'])
            print('category:Cloud,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='VEC':
            fcst_df.loc[row_idx, 'WindDirection'] = float(data['fcstValue'])
            print('category:WindDirection,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'])
        elif data['category']=='WSD':
            fcst_df.loc[row_idx, 'WindSpeed'] = float(data['fcstValue'])
            print('category:WindSpeed,',data['category'], 'fcstDate:',data['fcstDate'], 'fcstTime:',data['fcstTime'], 'fcstValue:',data['fcstValue'],'\n')
            row_idx+=3

category:Humidity, REH fcstDate: 20210523 fcstTime: 0000 fcstValue: 85
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0000 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0000 fcstValue: 15
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0000 fcstValue: 245
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0000 fcstValue: 2.1 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0300 fcstValue: 90
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0300 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0300 fcstValue: 14
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 0300 fcstValue: 222
category:WindSpeed, WSD fcstDate: 20210523 fcstTime: 0300 fcstValue: 1.3 

category:Humidity, REH fcstDate: 20210523 fcstTime: 0600 fcstValue: 90
category:Cloud, SKY fcstDate: 20210523 fcstTime: 0600 fcstValue: 1
category:Temperature, T3H fcstDate: 20210523 fcstTime: 0600 fcstValue: 14
category:WindDirection, VEC fcstDate: 20210523 fcstTime: 060

In [53]:
with open('ulsan_raw.pkl', 'wb') as f:
    pickle.dump(fcst_df, f)