In [1]:
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup

In [2]:
def get_weather_fromland(start_date,end_date,auth) -> pd.DataFrame:
    import datetime
    import pandas as pd

    #날짜 정의
    start_date = str(start_date)
    start_year = str(start_date)[:4]
    end_date = str(end_date)
    start_date = datetime.datetime.strptime(start_date,"%Y%m%d").date()
    end_date = datetime.datetime.strptime(end_date,"%Y%m%d").date()
    days = (end_date - start_date).days + 1

    #API 정의
    base = 'https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?'
    authKey=f'authKey={auth}'
    
    # 빈 df 정의
    df =  pd.DataFrame()
    
    # API 호출
    if days > 29:
        batch = days // 29 + 1
    else:
        batch = 1
    for i in range(batch):
        
        if start_date - datetime.timedelta(1) == end_date:
            break
        elif i+1 == batch:
            to_date = start_date + datetime.timedelta(days % 29)
        else:
            to_date = start_date + datetime.timedelta(29)
            
        # 날짜 텍스트화
        str_from_date = start_date.strftime('%Y%m%d')
        str_to_date = to_date.strftime('%Y%m%d')
        # url 정의
        url = base + f'tm1={str_from_date}0000&' + f'tm2={str_to_date}2300&' + 'help=0&' + authKey
        print(f'current_batch : {i + 1}/{batch}, current_date : {str_from_date} - {str_to_date}')
        # 데이터 생성
        try:
            print(url)
            df_tmp = get_parse_weather(url)
        except:
            try:
                time.sleep(2)
                print(f'error occured on {str_to_date}, Trying again with timesleep 2 secs')
                print(url)
                df_tmp = get_parse_weather(url)
            except:
                print(f'error occured on {str_to_date}, Total Down')
                df = pd.concat([df,df_tmp])
                break

            
        df = pd.concat([df,df_tmp])
        # 시작일 재정의
        start_date = to_date + datetime.timedelta(1)
        print(days)

    # 날짜 형식 변환
    df['datetime'] = pd.to_datetime(df['YYMMDDHHMI_KST'], format='%Y%m%d%H%M')
    df['datetime'] = df['datetime'].apply(lambda x : x.strftime(format='%Y-%m-%d %H:%M'))
    
    # 컬럼 순서 정리
    df.drop(['YYMMDDHHMI_KST'],axis=1,inplace=True)
    df = df[df['datetime'].str.split('-').str[0] == start_year]
    cols = df.columns.tolist()
    cols = [cols[-1]] + [col for col in cols[:-1]]
    df = df[cols]

    return df

def get_parse_weather(url):
    df =  pd.DataFrame()
    res = requests.get(url)
    soup = BeautifulSoup(res.text,'html.parser')

    col_1d = soup.text.split('\n')[2]
    col_2d = soup.text.split('\n')[3]
    data = soup.text.split('\n')[4:-2]

    # 행 정리
    df = pd.DataFrame(data, columns=['data'])
    df = pd.concat([df['data'].str.split(expand=True)], axis=1)
    
    # 컬럼 정리
    col_1d = col_1d.split()[1:]
    col_2d = col_2d.split()[1:]

    col_2d.insert(col_2d.index('LOW'),'-')
    col_2d.insert(col_2d.index('LOW'),'-')
    col_2d.insert(col_2d.index('LOW'),'-')
    col_2d = ['nan' if x.startswith('-') else x for x in col_2d]

    cols = [col_1d[i] + '_' + col_2d[i] for i in range(len(col_1d))]
    df.columns = cols
    return df

In [3]:
auth = '36MMtU5BTwijDLVOQZ8ISw'

In [24]:
df2013 = get_weather_fromland(20130101,20131231,auth)
df2013.to_csv('./data/weather_fromland_20130101_20131231.csv')
df2013

current_batch : 1/13, current_date : 20130101 - 20130130
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201301010000&tm2=201301302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 2/13, current_date : 20130131 - 20130301
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201301310000&tm2=201303012300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 3/13, current_date : 20130302 - 20130331
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201303020000&tm2=201303312300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 4/13, current_date : 20130401 - 20130430
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201304010000&tm2=201304302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 5/13, current_date : 20130501 - 20130530
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201305010000&tm2=201305302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 6/13, current_date : 20130531 - 20130629
https://

Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,PT_nan,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,2013-01-01 00:00,90,25,6.1,25,11.0,2251,1016.7,1019.0,0,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,1.0,-9,3,2
1,2013-01-01 00:00,95,36,0.6,-9,-9.0,-9,1002.3,1023.1,1,...,-3.4,-2.6,-2.2,-1.4,-0.6,-9,-9.0,-9,3,2
2,2013-01-01 00:00,98,36,0.9,-9,-9.0,-9,1007.3,1021.8,6,...,-2.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,2013-01-01 00:00,99,5,0.9,-9,-9.0,-9,1017.9,1021.8,7,...,-2.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,-9,-9
4,2013-01-01 00:00,100,25,4.9,25,10.0,2201,923.9,1021.2,2,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11149,2013-12-31 23:00,285,23,2.5,-9,-9.0,-9,1007.5,1011.6,-9,...,0.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11150,2013-12-31 23:00,288,0,0.2,-9,-9.0,-9,1010.3,1011.7,-9,...,-1.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11151,2013-12-31 23:00,289,29,5.4,27,13.0,2225,995.1,1011.9,-9,...,-0.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11152,2013-12-31 23:00,294,27,3.6,-9,-9.0,-9,1006.4,1012.0,-9,...,2.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [5]:
df2014 = get_weather_fromland(20140101,20141231,auth)
df2014.to_csv('./data/weather_fromland_20140101_20141231.csv')
df2014

current_batch : 1/13, current_date : 20140101 - 20140130
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201401010000&tm2=201401302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 2/13, current_date : 20140131 - 20140301
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201401310000&tm2=201403012300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 3/13, current_date : 20140302 - 20140331
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201403020000&tm2=201403312300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 4/13, current_date : 20140401 - 20140430
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201404010000&tm2=201404302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 5/13, current_date : 20140501 - 20140530
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201405010000&tm2=201405302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 6/13, current_date : 20140531 - 20140629
https://

Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,PT_nan,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,2014-01-01 00:00,90,27,7.5,23,14.0,2125,1005.7,1007.9,2,...,1.0,-99.0,-99.0,-99.0,-99.0,-9,0.5,-9,3,2
1,2014-01-01 00:00,95,23,2.4,-9,-9.0,-9,993.0,1012.0,2,...,-0.2,-0.1,-0.2,0.0,0.8,-9,-9.0,-9,3,2
2,2014-01-01 00:00,98,27,2.3,-9,-9.0,-9,998.5,1012.0,2,...,-0.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,2014-01-01 00:00,99,27,0.5,-9,-9.0,-9,1009.1,1012.8,2,...,0.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,2014-01-01 00:00,100,27,11.8,27,23.0,2155,915.8,1007.7,2,...,-1.8,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11271,2014-12-31 23:00,285,32,2.9,-9,-9.0,-9,1015.9,1020.1,-9,...,-1.5,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11272,2014-12-31 23:00,288,32,2.5,-9,-9.0,-9,1017.8,1019.2,-9,...,-2.5,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11273,2014-12-31 23:00,289,29,6.6,29,12.0,2229,1002.8,1020.3,-9,...,-1.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
11274,2014-12-31 23:00,294,29,2.9,-9,-9.0,-9,1013.6,1019.4,-9,...,-1.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [6]:
df2015 = get_weather_fromland(20150101,20151231,auth)
df2015.to_csv('./data/weather_fromland_20150101_20151231.csv')
df2015

current_batch : 1/13, current_date : 20150101 - 20150130
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201501010000&tm2=201501302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 2/13, current_date : 20150131 - 20150301
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201501310000&tm2=201503012300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 3/13, current_date : 20150302 - 20150331
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201503020000&tm2=201503312300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 4/13, current_date : 20150401 - 20150430
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201504010000&tm2=201504302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 5/13, current_date : 20150501 - 20150530
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201505010000&tm2=201505302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
365
current_batch : 6/13, current_date : 20150531 - 20150629
https://

Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,PT_nan,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,2015-01-01 00:00,90,29,3.5,-9,-9.0,-9,1014.4,1016.7,2,...,-2.9,-99.0,-99.0,-99.0,-99.0,-9,1.0,-9,3,2
1,2015-01-01 00:00,95,27,1.3,27,10.0,2305,1002.1,1022.1,2,...,-7.7,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,2
2,2015-01-01 00:00,98,32,6.3,34,14.0,2147,1008.0,1022.2,2,...,-4.8,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,2015-01-01 00:00,99,32,3.5,32,13.0,2212,1019.4,1023.3,2,...,-6.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,2015-01-01 00:00,100,27,8.6,25,14.0,2203,920.8,1017.3,2,...,-8.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11274,2015-12-31 23:00,285,0,0.2,-9,-9.0,-9,1029.5,1033.8,-9,...,-2.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11275,2015-12-31 23:00,288,29,1.1,-9,-9.0,-9,1031.7,1033.1,-9,...,-5.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11276,2015-12-31 23:00,289,0,0.2,-9,-9.0,-9,1016.1,1033.8,-9,...,-1.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
11277,2015-12-31 23:00,294,27,1.3,-9,-9.0,-9,1027.3,1033.2,-9,...,-1.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [7]:
df2016 = get_weather_fromland(20160101,20161231,auth)
df2016.to_csv('./data/weather_fromland_20160101_20161231.csv')
df2016

current_batch : 1/13, current_date : 20160101 - 20160130
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201601010000&tm2=201601302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
366
current_batch : 2/13, current_date : 20160131 - 20160229
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201601310000&tm2=201602292300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
366
current_batch : 3/13, current_date : 20160301 - 20160330
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201603010000&tm2=201603302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
366
current_batch : 4/13, current_date : 20160331 - 20160429
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201603310000&tm2=201604292300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
366
current_batch : 5/13, current_date : 20160430 - 20160529
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201604300000&tm2=201605292300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
366
current_batch : 6/13, current_date : 20160530 - 20160628
https://

Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,PT_nan,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,2016-01-01 00:00,90,0,0.4,-9,-9.0,-9,1027.1,1029.4,6,...,-1.5,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1,2016-01-01 00:00,95,0,0.3,-9,-9.0,-9,1013.6,1033.6,8,...,-1.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
2,2016-01-01 00:00,98,27,0.5,-9,-9.0,-9,1018.9,1033.0,8,...,-2.8,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,2016-01-01 00:00,99,0,0.0,-9,-9.0,-9,1029.5,1033.4,8,...,-1.8,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,2016-01-01 00:00,100,27,7.1,27,11.0,2345,937.0,1032.8,2,...,-5.5,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13668,2016-12-31 23:00,285,23,0.7,-9,-9.0,-9,1025.9,1030.2,-9,...,-0.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
13669,2016-12-31 23:00,288,0,0.4,-9,-9.0,-9,1028.0,1029.4,-9,...,-2.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
13670,2016-12-31 23:00,289,36,1.2,-9,-9.0,-9,1012.1,1029.5,-9,...,0.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
13671,2016-12-31 23:00,294,0,0.2,-9,-9.0,-9,1023.7,1029.6,-9,...,-0.5,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [8]:
df20170531 = get_weather_fromland(20170101,20170531,auth)
df20170531.to_csv('./data/weather_fromland_20170101_20170531.csv')
df20170531

current_batch : 1/6, current_date : 20170101 - 20170130
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201701010000&tm2=201701302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
151
current_batch : 2/6, current_date : 20170131 - 20170301
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201701310000&tm2=201703012300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
151
current_batch : 3/6, current_date : 20170302 - 20170331
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201703020000&tm2=201703312300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
151
current_batch : 4/6, current_date : 20170401 - 20170430
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201704010000&tm2=201704302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
151
current_batch : 5/6, current_date : 20170501 - 20170530
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201705010000&tm2=201705302300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
151
current_batch : 6/6, current_date : 20170531 - 20170606
https://apihub

Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,PT_nan,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,2017-01-01 00:00,90,14,1.5,-9,-9.0,-9,1024.0,1026.3,7,...,1.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1,2017-01-01 00:00,93,34,1.2,-9,-9.0,-9,1018.1,1030.4,3,...,-1.7,-0.3,0.1,1.0,1.7,-9,-9.0,-9,3,1
2,2017-01-01 00:00,95,0,0.3,-9,-9.0,-9,1010.7,1030.5,3,...,-1.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,2017-01-01 00:00,98,11,0.5,-9,-9.0,-9,1015.6,1029.5,3,...,-2.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,2017-01-01 00:00,99,9,0.5,-9,-9.0,-9,1025.9,1029.7,1,...,-1.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15954,2017-06-06 23:00,285,0,0.1,-9,-9.0,-9,1009.0,1012.8,-9,...,17.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
15955,2017-06-06 23:00,288,18,1.9,-9,-9.0,-9,1011.0,1012.3,-9,...,17.8,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
15956,2017-06-06 23:00,289,5,3.6,-9,-9.0,-9,995.2,1011.4,-9,...,17.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
15957,2017-06-06 23:00,294,11,2.1,-9,-9.0,-9,1007.7,1013.1,-9,...,17.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,4,-9


In [12]:
df20170531['datetime'].str.split('-').str[1]

0        01 00:00
1        01 00:00
2        01 00:00
3        01 00:00
4        01 00:00
           ...   
15954    06 23:00
15955    06 23:00
15956    06 23:00
15957    06 23:00
15958    06 23:00
Name: datetime, Length: 357870, dtype: object

In [14]:
df20170531 = df20170531[df20170531['datetime'].str.split('-').str[1].astype(int) <= 5]
df20170531.to_csv('./data/weather_fromland_20170101_20170531.csv')

In [2]:
df2013 = pd.read_csv('./data/weather_fromland_20130101_20131231.csv')
df2014 = pd.read_csv('./data/weather_fromland_20140101_20141231.csv')
df2015 = pd.read_csv('./data/weather_fromland_20150101_20151231.csv')
df2016 = pd.read_csv('./data/weather_fromland_20160101_20161231.csv')
df2017 = pd.read_csv('./data/weather_fromland_20170101_20170531.csv')

In [3]:
df = pd.concat([df2013, df2014, df2015, df2016, df2017])

In [18]:
df.to_csv("./data/weather_fromland_20130101_20170531.csv")

NameError: name 'df' is not defined

In [6]:
df

Unnamed: 0.1,Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,0,2013-01-01 00:00,90,25,6.1,25,11.0,2251,1016.7,1019.0,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,1.0,-9,3,2
1,1,2013-01-01 00:00,95,36,0.6,-9,-9.0,-9,1002.3,1023.1,...,-3.4,-2.6,-2.2,-1.4,-0.6,-9,-9.0,-9,3,2
2,2,2013-01-01 00:00,98,36,0.9,-9,-9.0,-9,1007.3,1021.8,...,-2.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,3,2013-01-01 00:00,99,5,0.9,-9,-9.0,-9,1017.9,1021.8,...,-2.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,-9,-9
4,4,2013-01-01 00:00,100,25,4.9,25,10.0,2201,923.9,1021.2,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344186,2275,2017-05-31 23:00,285,23,1.2,-9,-9.0,-9,997.9,1001.6,...,22.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
344187,2276,2017-05-31 23:00,288,16,2.1,-9,-9.0,-9,1000.5,1001.8,...,22.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
344188,2277,2017-05-31 23:00,289,9,1.4,-9,-9.0,-9,985.5,1001.3,...,22.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
344189,2278,2017-05-31 23:00,294,36,1.5,-9,-9.0,-9,996.8,1002.1,...,22.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [19]:
df = pd.read_csv('./data/weather_fromland_20130101_20170531.csv')

In [16]:
df201706_201906 = pd.read_csv('./data/weather_fromland_20170601_20190626.csv')
df201906_201907 = pd.read_csv("./data/weather_fromland_20190627_20190731.csv")
df2019_202107 = pd.read_csv("./data/weather_fromland_20190101_20210718.csv")
df2021 = pd.read_csv("./data/weather_fromland_20210719_20230319.csv")

In [8]:
df201706_201906

Unnamed: 0.1,Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,0,2017-06-01 00:00,90,32,0.7,-9,-9.0,-9,996.7,998.8,...,18.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1,1,2017-06-01 00:00,93,0,0.0,-9,-9.0,-9,989.6,1000.6,...,17.7,20.2,21.7,22.7,21.9,-9,-9.0,-9,3,2
2,2,2017-06-01 00:00,95,34,0.6,-9,-9.0,-9,983.3,1001.2,...,15.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,3,2017-06-01 00:00,98,2,0.8,-9,-9.0,-9,988.4,1001.2,...,21.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,4,2017-06-01 00:00,99,0,0.0,-9,-9.0,-9,997.6,1001.0,...,20.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,-9,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1723937,13802,2019-06-26 23:00,285,0,0.1,-9,-9.0,-9,1002.2,1005.9,...,21.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
1723938,13803,2019-06-26 23:00,288,0,0.1,-9,-9.0,-9,1005.2,1006.5,...,20.7,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
1723939,13804,2019-06-26 23:00,289,0,0.4,-9,-9.0,-9,989.7,1005.5,...,20.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
1723940,13805,2019-06-26 23:00,294,11,3.4,-9,-9.0,-9,1000.3,1005.6,...,22.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9


In [9]:
df201906_201907

Unnamed: 0.1,Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,0,2019-06-27 00:00,90,16,2.4,-9,-9.0,-9,1002.4,1004.4,...,21.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
1,1,2019-06-27 00:00,93,7,0.6,-9,-9.0,-9,993.5,1004.4,...,24.0,25.1,26.2,25.8,26.0,-9,-9.0,-9,3,2
2,2,2019-06-27 00:00,95,36,1.1,-9,-9.0,-9,986.4,1004.0,...,23.7,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,3,2019-06-27 00:00,98,0,0.3,-9,-9.0,-9,991.2,1004.2,...,24.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,4,2019-06-27 00:00,99,0,0.4,-9,-9.0,-9,1000.7,1004.2,...,22.7,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1715914,63819,2021-07-18 23:00,285,0,0.3,-9,-9.0,-9,1010.0,1013.0,...,27.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1715915,63820,2021-07-18 23:00,288,0,0.4,-9,-9.0,-9,1012.0,1012.9,...,25.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1715916,63821,2021-07-18 23:00,289,5,1.3,-9,-9.0,-9,997.7,1013.3,...,26.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1715917,63822,2021-07-18 23:00,294,11,2.3,-9,-9.0,-9,1008.5,1013.6,...,27.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [10]:
df2019_202107

Unnamed: 0.1,Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,0,2019-01-01 00:00,90,32,0.8,-9,-9.0,-9,1028.9,1031.2,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1,1,2019-01-01 00:00,93,34,0.6,-9,-9.0,-9,1023.2,1036.0,...,-12.1,-6.6,-4.2,-2.4,-1.0,-9,-9.0,-9,3,-9
2,2,2019-01-01 00:00,95,2,0.5,-9,-9.0,-9,1016.2,1037.1,...,-11.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,3,2019-01-01 00:00,98,2,1.6,-9,-9.0,-9,1021.2,1036.6,...,-8.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,4,2019-01-01 00:00,99,0,0.0,-9,-9.0,-9,1031.9,1036.0,...,-10.7,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2189590,68379,2021-07-18 23:00,285,0,0.3,-9,-9.0,-9,1010.0,1013.0,...,27.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
2189591,68380,2021-07-18 23:00,288,0,0.4,-9,-9.0,-9,1012.0,1012.9,...,25.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
2189592,68381,2021-07-18 23:00,289,5,1.3,-9,-9.0,-9,997.7,1013.3,...,26.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
2189593,68382,2021-07-18 23:00,294,11,2.3,-9,-9.0,-9,1008.5,1013.6,...,27.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [11]:
df2021

Unnamed: 0.1,Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,0,2021-07-19 00:00,90,0,0.4,-9,-9.0,-9,1010.6,1012.6,...,25.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1,1,2021-07-19 00:00,93,9,0.7,-9,-9.0,-9,1002.0,1012.9,...,26.0,28.6,29.5,30.3,29.8,-9,-9.0,-9,4,1
2,2,2021-07-19 00:00,95,18,0.8,-9,-9.0,-9,995.0,1012.6,...,27.0,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
3,3,2021-07-19 00:00,98,25,1.8,-9,-9.0,-9,999.7,1012.9,...,25.5,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
4,4,2021-07-19 00:00,99,7,0.9,-9,-9.0,-9,1009.1,1012.6,...,24.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,1,-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1530199,12793,2023-05-15 11:00,288,11,1.3,-9,-9.0,-9,1009.0,1009.9,...,38.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1530200,12794,2023-05-15 11:00,289,18,1.7,-9,-9.0,-9,994.9,1010.5,...,39.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1530201,12795,2023-05-15 11:00,294,25,2.4,-9,-9.0,-9,1006.0,1011.1,...,30.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1530202,12796,2023-05-15 11:00,295,36,1.3,-9,-9.0,-9,1006.0,1011.2,...,37.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [20]:
df_fin = pd.concat([df, df201706_201906, df201906_201907, df2021])

In [24]:
df_fin['datetime'].str.split(' ').str[0].nunique()

3787

In [25]:
(365 * 10) + (1 * 2) + 31 + 28 + 31 + 30 + 15

3787

In [14]:
df_fin.drop(columns='Unnamed: 0', inplace=True)
df_fin

Unnamed: 0,datetime,STN_ID,WD_16,WS_m/s,GST_WD,GST_WS,GST_TM,PA_hPa,PS_hPa,PT_nan,...,TS_C,TE_5,TE_10,TE_20,TE_30,ST_SEA,WH_m,BF_nan,IR_nan,IX_nan
0,2013-01-01 00:00,90,25,6.1,25,11.0,2251,1016.7,1019.0,0,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,1.0,-9,3,2
1,2013-01-01 00:00,95,36,0.6,-9,-9.0,-9,1002.3,1023.1,1,...,-3.4,-2.6,-2.2,-1.4,-0.6,-9,-9.0,-9,3,2
2,2013-01-01 00:00,98,36,0.9,-9,-9.0,-9,1007.3,1021.8,6,...,-2.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
3,2013-01-01 00:00,99,5,0.9,-9,-9.0,-9,1017.9,1021.8,7,...,-2.3,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,-9,-9
4,2013-01-01 00:00,100,25,4.9,25,10.0,2201,923.9,1021.2,2,...,-4.9,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1530199,2023-05-15 11:00,288,11,1.3,-9,-9.0,-9,1009.0,1009.9,-9,...,38.1,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1530200,2023-05-15 11:00,289,18,1.7,-9,-9.0,-9,994.9,1010.5,-9,...,39.6,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1530201,2023-05-15 11:00,294,25,2.4,-9,-9.0,-9,1006.0,1011.1,-9,...,30.2,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9
1530202,2023-05-15 11:00,295,36,1.3,-9,-9.0,-9,1006.0,1011.2,-9,...,37.4,-99.0,-99.0,-99.0,-99.0,-9,-9.0,-9,3,-9


In [17]:
df_fin[['datetime', 'STN_ID']].groupby('datetime').agg({'STN_ID':'count'}).STN_ID.unique()

array([92,  1, 91, 88, 90, 93,  6,  7,  8,  3,  2,  5,  4, 89, 94, 95, 18,
       84, 12, 80, 87, 96, 76, 86, 97], dtype=int64)

In [26]:
df_fin.to_csv("./data/weather_fromland_20130101_20230515.csv", encoding='utf-8', index=0)

In [7]:
test = df_fin[['datetime', 'STN_ID']].groupby('datetime').agg({'STN_ID':'count'})

In [2]:
import pandas as pd

In [6]:
df_fin = pd.read_csv('./data/weather_fromland_20130101_20230515.csv')

In [17]:
test[test['STN_ID'] == 12]

Unnamed: 0_level_0,STN_ID
datetime,Unnamed: 1_level_1
2015-11-02 22:00,12


In [18]:
test[test['STN_ID'] == 18]

Unnamed: 0_level_0,STN_ID
datetime,Unnamed: 1_level_1
2015-06-27 05:00,18


In [4]:
df_val = get_weather_fromland(20130117,20130118,auth)

current_batch : 1/1, current_date : 20130117 - 20130119
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201301170000&tm2=201301192300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
2


In [12]:
df_val_grouped = df_val.groupby('datetime').agg({'STN_ID':'count'})

In [14]:
df_val_grouped[df_val_grouped['STN_ID'] == 1]

Unnamed: 0_level_0,STN_ID
datetime,Unnamed: 1_level_1
2013-01-17 12:30,1


In [15]:
df_val2 = get_weather_fromland(20130205,20130206,auth)
df_val_grouped2 = df_val2.groupby('datetime').agg({'STN_ID':'count'})
df_val_grouped2[df_val_grouped2['STN_ID'] == 1]

current_batch : 1/1, current_date : 20130205 - 20130207
https://apihub.kma.go.kr/api/typ01/url/kma_sfctm3.php?tm1=201302050000&tm2=201302072300&help=0&authKey=36MMtU5BTwijDLVOQZ8ISw
2


Unnamed: 0_level_0,STN_ID
datetime,Unnamed: 1_level_1
2013-02-05 20:30,1
