In [2]:
%load_ext autoreload
%autoreload 2

import logging
import os
import sys
import time
    
wd = os.path.abspath("__file__").replace("/__file__", "").replace("notebooks", "")
os.chdir(wd)

import numpy as np
import pandas as pd

from datetime import datetime, date
from logging.handlers import TimedRotatingFileHandler


from bok.api import Bok
from bok.utils import get_jinja_yaml_conf, create_db_engine, Postgres_connect
from bok.processing import get_statdate_table

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
# os.chdir(os.path.dirname(__file__))
conf = get_jinja_yaml_conf('./conf/api.yml', './conf/logging.yml')
end_date = datetime.now().date() 


# logger 설정
logger = logging.getLogger('main')
logging.basicConfig(level=eval(conf['logging']['level']),
    format=conf['logging']['format'],
    handlers = [TimedRotatingFileHandler(filename =  conf['logging']['file_name'],
                                when=conf['logging']['when'],
                                interval=conf['logging']['interval'],
                                backupCount=conf['logging']['backupCount']), logging.StreamHandler()]
                )


In [4]:
# Only for notebooks
import re

os.environ['_ts'] = datetime.astimezone(datetime.now()).strftime('%Y-%m-%d %H:%m:%S %z')

with open('./conf/credentials', "r") as file:
    # 각 라인 읽기
    for line in file:
        # 주석(#) 또는 빈 줄은 무시
        if line.strip() == '' or line.startswith('#'):
            continue

        # 각 라인을 '='를 기준으로 key와 value로 분리
        key, value = line.strip().split('=', 1)

        # $ENV 형식의 환경변수가 있을 경우 해당 값을 가져와서 설정
        env_var_pattern = re.compile(r'\$(\w+)')
        matches = env_var_pattern.findall(value)
        for match in matches:
            value = value.replace(f"${match}", os.environ.get(match, "")).replace('"', '')

        # 환경변수로 설정
        os.environ[key] = value

engine = create_db_engine(os.environ)
postgres_conn = Postgres_connect(engine)
bok = Bok(auth_key = os.environ['auth_key'])

2024-04-09 15:33:54,208 (utils.py 53) INFO ::: Connect to 172.20.10.3. DB_NAME is economics


# 1. statistic_list

In [20]:
stat_list = bok.get_statistic_list(rename = conf['data']['stat_list']['rename'], _ts = os.environ['_ts'])

upload_stat_list = ext_notin_db(db_conn = postgres_conn,
    schema = conf['schema_name'],
    table = conf['data']['stat_list']['table_name'],
    data = stat_list,
    subset = conf['data']['stat_list']['dup_cols'])

upload_rows = upload_stat_list.to_sql(conf['data']['stat_list']['table_name'], schema = conf['schema_name'], con = engine, index = False, if_exists = "append")

# 2. 통계용어사전

In [55]:
_ts

'2024-02-11 00:02:1707577820 +0900'

In [45]:
import requests
import json
date = None
_ts = os.environ['_ts']

date = date if date else pd.to_datetime(_ts).strftime('%Y%m%d')

params = {
  "header": {
    "guidSeq": 1,
    "trxCd": "OSUSD01R01",
    "sysCd": "03",
    "fstChnCd": "WEB",
    "langDvsnCd": "KO",
    "envDvsnCd": "D",
    "sndRspnDvsnCd": "S",
    "sndDtm": '15000424',
    "usrId": "IECOSPC",
    "pageNum": 1,
    "pageCnt": 1000
  },
  "data": {
  }
}


# GET 요청 보내기
response = requests.post('https://ecos.bok.or.kr/serviceEndpoint/httpService/request.json', json = params)
words = json.loads(response.text)
data = pd.DataFrame(words['data']['statTermList']).rename(columns = conf['data']['stat_word']['rename'])
data = data[conf['data']['stat_word']['rename'].values()]
data['_ts'] = _ts

In [77]:
isinstance(pd.to_datetime(data['수정날짜'].iloc[0]).date(), (datetime, date))

True

In [64]:
data['수정날짜'].map(lambda x: pd.to_datetime(x))

0     2007-04-24 00:00:00
1     2023-05-19 13:00:50
2     2007-02-04 21:44:30
3     2007-02-04 20:29:35
4     2024-01-22 11:26:21
              ...        
787   2007-02-04 21:44:30
788   2007-02-04 21:44:30
789   2010-08-30 00:00:00
790   2022-07-01 10:56:56
791   2022-07-01 10:56:57
Name: 수정날짜, Length: 792, dtype: datetime64[ns]

In [50]:
conf['data']['stat_word']['rename'].keys()

dict_keys(['statTermId', 'statTermNm', 'statTermEngNm', 'statTermDesc', 'zRegDtm', 'zModDtm'])

# 3. 통계 세부항목 목록

In [21]:
stat_list = postgres_conn.get_data(schema_name = conf['schema_name'],
            table_name = conf['data']['stat_list']['table_name'],
            columns = ['통계표코드'],
            where = ["검색가능여부 = 'Y'"], is_orderby = True).to_numpy().ravel()

In [35]:
upload_rows = 0
for stat in stat_list:
    logger.info(f"Upload {stat} starts!")
    stat_item = bok.get_statistic_item(stat, _ts = os.environ['_ts'], rename = conf['data']['stat_item']['rename'])

    upload_stat_item = ext_notin_db(db_conn = postgres_conn,
            schema = conf['schema_name'],
            table = conf['data']['stat_item']['table_name'],
            data = stat_item,
            subset = conf['data']['stat_item']['dup_cols'])

    upload_rows += postgres_conn.upsert(data = upload_stat_item,
                     schema_name = conf['schema_name'],
                     table_name = conf['data']['stat_item']['table_name'])



logger.info(f"Insert rows: {upload_rows}.")

2024-02-05 13:22:05,156 (3572690103.py 2) INFO ::: Upload 901Y012 starts!
2024-02-05 13:22:05,935 (3572690103.py 15) INFO ::: Insert rows: 0.
2024-02-05 13:22:05,936 (3572690103.py 2) INFO ::: Upload 303Y001 starts!
2024-02-05 13:22:06,817 (3572690103.py 15) INFO ::: Insert rows: 336.
2024-02-05 13:22:06,818 (3572690103.py 2) INFO ::: Upload 303Y002 starts!
2024-02-05 13:22:07,620 (3572690103.py 15) INFO ::: Insert rows: 252.
2024-02-05 13:22:07,620 (3572690103.py 2) INFO ::: Upload 303Y003 starts!
2024-02-05 13:22:08,389 (3572690103.py 15) INFO ::: Insert rows: 120.
2024-02-05 13:22:08,390 (3572690103.py 2) INFO ::: Upload 303Y004 starts!
2024-02-05 13:22:09,226 (3572690103.py 15) INFO ::: Insert rows: 336.
2024-02-05 13:22:09,227 (3572690103.py 2) INFO ::: Upload 303Y005 starts!
2024-02-05 13:22:10,037 (3572690103.py 15) INFO ::: Insert rows: 252.
2024-02-05 13:22:10,037 (3572690103.py 2) INFO ::: Upload 303Y006 starts!
2024-02-05 13:22:11,051 (3572690103.py 15) INFO ::: Insert rows:

# 4. 통계 조회조건 설정

In [4]:
stat_item = postgres_conn.get_data(schema_name = conf['schema_name'],
            table_name = conf['data']['stat_item']['table_name'],
            columns = ['통계표코드', '통계항목코드', '주기', '수록시작일자', '수록종료일자', '자료수'],
            is_orderby = True)

In [11]:
stat_item.groupby(['통계표코드', '주기']).agg({
    '수록시작일자': 'first',
    '수록종료일자': 'last',
    '자료수': 'sum'
}).reset_index()

In [19]:
for idx, stat_series in stat_group.iterrows():
    stat, period, start, end, count = stat_series
    logger.info(f"Upload {stat} starts! ({idx+1}/{stat_group.shape[0]} items...)")

    db_count = postgres_conn.get_count(schema_name = conf['schema_name'],
             table_name = conf['data']['stat_search']['table_name'],
             where = [f"통계표코드 = '{stat}'", f"주기 = '{period}'"])

    if db_count == count:
        logger.info(f"All data is uploaded in DB ({db_count}/{count}). Skip API call.")
        continue
    
    stat_search = bok.get_statistic_search(stat, 
                                         period = period, 
                                         start = start, 
                                         end = end, 
                                         _ts = os.environ['_ts'], rename = conf['data']['stat_search']['rename'])

    stat_search['주기'] = period
    
    stat_search.fillna({col: '' for col in conf['data']['stat_search']['dup_cols']} | {'값': np.nan}, inplace = True)
    
    upload_stat_search = ext_notin_db(db_conn = postgres_conn,
            schema = conf['schema_name'],
            table = conf['data']['stat_search']['table_name'],
            data = stat_search,
            subset = conf['data']['stat_search']['dup_cols'])

    # 한은에서 중복 데이터 조회 오류 수정 시 제거할 코드
    upload_stat_search.drop_duplicates(conf['data']['stat_search']['dup_cols'], inplace = True)


    upload_rows = postgres_conn.upsert(data = upload_stat_search,
                     schema_name = conf['schema_name'],
                     table_name = conf['data']['stat_search']['table_name'])
    
    print(upload_rows)


# logger.info(f"Insert rows: {upload_rows}.")

2024-02-10 08:33:48,774 (3058550100.py 3) INFO ::: Upload 101Y001 starts! (1/946 items...)
2024-02-10 08:33:48,782 (3058550100.py 10) INFO ::: All data is uploaded in DB (263/263). Skip API call.
2024-02-10 08:33:48,782 (3058550100.py 3) INFO ::: Upload 101Y001 starts! (2/946 items...)
2024-02-10 08:33:48,784 (3058550100.py 10) INFO ::: All data is uploaded in DB (3287/3287). Skip API call.
2024-02-10 08:33:48,784 (3058550100.py 3) INFO ::: Upload 101Y002 starts! (3/946 items...)
2024-02-10 08:33:48,785 (3058550100.py 10) INFO ::: All data is uploaded in DB (810/810). Skip API call.
2024-02-10 08:33:48,785 (3058550100.py 3) INFO ::: Upload 101Y002 starts! (4/946 items...)
2024-02-10 08:33:48,797 (3058550100.py 10) INFO ::: All data is uploaded in DB (9811/9811). Skip API call.
2024-02-10 08:33:48,797 (3058550100.py 3) INFO ::: Upload 101Y002 starts! (5/946 items...)
2024-02-10 08:33:48,798 (3058550100.py 10) INFO ::: All data is uploaded in DB (3265/3265). Skip API call.
2024-02-10 08:

0


2024-02-10 08:34:05,007 (api.py 92) INFO ::: Read 3182/3182 rows....
2024-02-10 08:34:23,256 (utils.py 153) INFO ::: Upload data starts: 0 rows.
2024-02-10 08:34:23,258 (3058550100.py 3) INFO ::: Upload 111Y001 starts! (83/946 items...)
2024-02-10 08:34:23,259 (3058550100.py 10) INFO ::: All data is uploaded in DB (116/116). Skip API call.
2024-02-10 08:34:23,260 (3058550100.py 3) INFO ::: Upload 111Y001 starts! (84/946 items...)
2024-02-10 08:34:23,261 (3058550100.py 10) INFO ::: All data is uploaded in DB (1438/1438). Skip API call.
2024-02-10 08:34:23,261 (3058550100.py 3) INFO ::: Upload 111Y001 starts! (85/946 items...)


0


2024-02-10 08:34:37,516 (utils.py 153) INFO ::: Upload data starts: 0 rows.
2024-02-10 08:34:37,518 (3058550100.py 3) INFO ::: Upload 111Y002 starts! (86/946 items...)
2024-02-10 08:34:37,520 (3058550100.py 10) INFO ::: All data is uploaded in DB (436/436). Skip API call.
2024-02-10 08:34:37,520 (3058550100.py 3) INFO ::: Upload 111Y002 starts! (87/946 items...)
2024-02-10 08:34:37,521 (3058550100.py 10) INFO ::: All data is uploaded in DB (5267/5267). Skip API call.
2024-02-10 08:34:37,521 (3058550100.py 3) INFO ::: Upload 111Y002 starts! (88/946 items...)
2024-02-10 08:34:37,522 (3058550100.py 10) INFO ::: All data is uploaded in DB (1747/1747). Skip API call.
2024-02-10 08:34:37,523 (3058550100.py 3) INFO ::: Upload 111Y003 starts! (89/946 items...)
2024-02-10 08:34:37,523 (3058550100.py 10) INFO ::: All data is uploaded in DB (116/116). Skip API call.
2024-02-10 08:34:37,524 (3058550100.py 3) INFO ::: Upload 111Y003 starts! (90/946 items...)
2024-02-10 08:34:37,525 (3058550100.py 1

0


2024-02-10 08:34:51,688 (utils.py 153) INFO ::: Upload data starts: 0 rows.
2024-02-10 08:34:51,690 (3058550100.py 3) INFO ::: Upload 111Y004 starts! (92/946 items...)
2024-02-10 08:34:51,691 (3058550100.py 10) INFO ::: All data is uploaded in DB (421/421). Skip API call.
2024-02-10 08:34:51,691 (3058550100.py 3) INFO ::: Upload 111Y004 starts! (93/946 items...)
2024-02-10 08:34:51,692 (3058550100.py 10) INFO ::: All data is uploaded in DB (5195/5195). Skip API call.
2024-02-10 08:34:51,693 (3058550100.py 3) INFO ::: Upload 111Y004 starts! (94/946 items...)
2024-02-10 08:34:51,694 (3058550100.py 10) INFO ::: All data is uploaded in DB (1723/1723). Skip API call.
2024-02-10 08:34:51,694 (3058550100.py 3) INFO ::: Upload 111Y005 starts! (95/946 items...)
2024-02-10 08:34:51,695 (3058550100.py 10) INFO ::: All data is uploaded in DB (436/436). Skip API call.
2024-02-10 08:34:51,695 (3058550100.py 3) INFO ::: Upload 111Y005 starts! (96/946 items...)
2024-02-10 08:34:51,696 (3058550100.py 1

0


2024-02-10 08:35:05,570 (utils.py 153) INFO ::: Upload data starts: 0 rows.
2024-02-10 08:35:05,572 (3058550100.py 3) INFO ::: Upload 112Y002 starts! (113/946 items...)
2024-02-10 08:35:05,573 (3058550100.py 10) INFO ::: All data is uploaded in DB (280/280). Skip API call.
2024-02-10 08:35:05,574 (3058550100.py 3) INFO ::: Upload 112Y002 starts! (114/946 items...)
2024-02-10 08:35:05,575 (3058550100.py 10) INFO ::: All data is uploaded in DB (3470/3470). Skip API call.
2024-02-10 08:35:05,575 (3058550100.py 3) INFO ::: Upload 112Y002 starts! (115/946 items...)
2024-02-10 08:35:05,576 (3058550100.py 10) INFO ::: All data is uploaded in DB (1150/1150). Skip API call.
2024-02-10 08:35:05,576 (3058550100.py 3) INFO ::: Upload 121Y002 starts! (116/946 items...)
2024-02-10 08:35:05,577 (3058550100.py 10) INFO ::: All data is uploaded in DB (721/721). Skip API call.
2024-02-10 08:35:05,577 (3058550100.py 3) INFO ::: Upload 121Y002 starts! (117/946 items...)
2024-02-10 08:35:05,578 (3058550100

0


2024-02-10 08:35:05,969 (api.py 92) INFO ::: Read 332/332 rows....
2024-02-10 08:35:23,755 (utils.py 153) INFO ::: Upload data starts: 132 rows.
2024-02-10 08:35:23,777 (3058550100.py 3) INFO ::: Upload 131Y010 starts! (144/946 items...)


132


2024-02-10 08:35:24,132 (api.py 92) INFO ::: Read 1328/1328 rows....


KeyboardInterrupt: 

# 5. 100대 통계지표

In [44]:
stat_key = bok.get_statistic_key(_ts = os.environ['_ts'], rename = conf['data']['stat_key']['rename'])

upload_stat_key = ext_notin_db(db_conn = postgres_conn,
            schema = conf['schema_name'],
            table = conf['data']['stat_key']['table_name'],
            data = stat_key,
            subset = conf['data']['stat_key']['dup_cols'])

upload_rows += postgres_conn.upsert(data = upload_stat_key,
                     schema_name = conf['schema_name'],
                     table_name = conf['data']['stat_key']['table_name'])

# 6. 통계메타DB

In [4]:
stat_meta = bok.get_statistic_meta(rename = conf['data']['stat_meta']['rename'])
stat_meta['메타데이터'] = stat_meta['메타데이터'].map(lambda x: x[:10] if isinstance(x, str) else x)

2024-02-11 16:59:43,056 (api.py 109) INFO ::: Get statistic meta data (1/20)
2024-02-11 16:59:43,152 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:43,754 (api.py 109) INFO ::: Get statistic meta data (2/20)
2024-02-11 16:59:43,848 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:44,451 (api.py 109) INFO ::: Get statistic meta data (3/20)
2024-02-11 16:59:44,539 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:45,142 (api.py 109) INFO ::: Get statistic meta data (4/20)
2024-02-11 16:59:45,226 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:45,829 (api.py 109) INFO ::: Get statistic meta data (5/20)
2024-02-11 16:59:45,916 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:46,518 (api.py 109) INFO ::: Get statistic meta data (6/20)
2024-02-11 16:59:46,607 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:47,210 (api.py 109) INFO ::: Get statistic meta data (7/20)
2024-02-11 16:59:47,306 (api.py 149) INFO ::: Read 15

In [51]:
postgres_conn.get_data(schema_name = conf['schema_name'],
            table_name = conf['data']['stat_meta']['table_name'],
            columns = conf['data']['stat_meta']['dup_cols'])

Unnamed: 0,레벨,데이터명,통계항목코드,메타데이터
0,2,경제심리지수,0000000098,
1,2,경제심리지수,0000000099,
2,1,경제심리지수,0000000100,
3,2,경제심리지수,0000000101,
4,2,경제심리지수,0000000102,
...,...,...,...,...
3116,2,뉴스심리지수,N08,
3117,3,뉴스심리지수,N083,
3118,3,뉴스심리지수,N081,
3119,3,뉴스심리지수,N084,


In [5]:
stat_meta = bok.get_statistic_meta(rename = conf['data']['stat_meta']['rename'])
stat_meta['메타데이터_key'] = stat_meta['메타데이터'].map(lambda x: x[:30] if isinstance(x, str) else '')
stat_meta = stat_meta.drop_duplicates(conf['data']['stat_meta']['dup_cols'])

upload_stat_meta = postgres_conn.ext_notin_db(
            schema_name = conf['schema_name'],
            table_name = conf['data']['stat_meta']['table_name'],
            data = stat_meta,
            subset = conf['data']['stat_meta']['dup_cols'])

upload_stat_meta['레벨'] = upload_stat_meta['레벨'].astype(str)

postgres_conn.insert_db(data = upload_stat_meta,
                     schema_name = conf['schema_name'],
                     table_name = conf['data']['stat_meta']['table_name'])

2024-02-11 16:59:57,114 (api.py 109) INFO ::: Get statistic meta data (1/20)
2024-02-11 16:59:57,210 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:57,812 (api.py 109) INFO ::: Get statistic meta data (2/20)
2024-02-11 16:59:57,909 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:58,512 (api.py 109) INFO ::: Get statistic meta data (3/20)
2024-02-11 16:59:58,598 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:59,200 (api.py 109) INFO ::: Get statistic meta data (4/20)
2024-02-11 16:59:59,290 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 16:59:59,893 (api.py 109) INFO ::: Get statistic meta data (5/20)
2024-02-11 16:59:59,975 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 17:00:00,577 (api.py 109) INFO ::: Get statistic meta data (6/20)
2024-02-11 17:00:00,696 (api.py 149) INFO ::: Read 157/157 rows....
2024-02-11 17:00:01,298 (api.py 109) INFO ::: Get statistic meta data (7/20)
2024-02-11 17:00:01,447 (api.py 149) INFO ::: Read 15

In [14]:
import requests, json
import pandas as pd
params = {
        "header": {
            
        },
"data": {
}
}

response = requests.get('http://www.bok.or.kr/portal/singl/siteConectStats/rcord.do?menuNo=200775&conectUrlad=/portal/stats/statsPublictSchdul/listCldr.do?menuNo=200775')
words = json.loads(response.text)
words
# web_data = pd.DataFrame(words['data']['statDescList']).rename(columns = rename)
# web_data = web_data[[col for col in rename.values() if col in web_data.columns]]


# data = pd.DataFrame()

# for idx, row in web_data.iterrows():
#     logger.info(f"Get statistic meta data ({idx + 1}/{web_data.shape[0]})")
#     api_data = self._get_api_results("StatisticMeta", row['데이터명'], rename = rename, _ts = _ts)
#     api_data[row.index] = row.tolist()
#     data = pd.concat([data, api_data])

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [15]:
response.text

''

# 6. stat_date

In [35]:
data.unique()[:-1]

array(['2015.1.25 08:00', '2015.3.5 08:00', '2015.4.25 08:00',
       '2015.6.5 08:00', '2015.7.25 08:00', '2015.9.5 08:00',
       '2015.10.24 08:00', '2015.12.18 08:00', '2015.12.5 08:00',
       '2015.1.25 06:00', '2015.2.21 06:00', '2015.3.27 06:00',
       '2015.4.25 06:00', '2015.5.22 06:00', '2015.6.27 06:00',
       '2015.7.25 06:00', '2015.8.21 06:00', '2015.9.27 06:00',
       '2015.10.24 06:00', '2015.11.27 06:00', '2015.12.27 06:00',
       '2015.1.23 06:00', '2015.3.22 06:00', '2015.4.23 06:00',
       '2015.6.21 06:00', '2015.7.23 06:00', '2015.9.24 06:00',
       '2015.10.22 06:00', '2015.11.20 06:00', '2015.12.20 06:00',
       '2015.1.17 12:00', '2015.2.16 12:00', '2015.3.15 12:00',
       '2015.4.16 12:00', '2015.5.16 12:00', '2015.6.14 12:00',
       '2015.7.15 12:00', '2015.8.13 12:00', '2015.9.13 12:00',
       '2015.10.16 12:00', '2015.11.14 12:00', '2015.12.16 12:00',
       '2015.1.31 17:00', '2015.2.29 17:00', '2015.3.29 17:00',
       '2015.4.30 17:00', '2015.

In [38]:
pd.Series(data.unique()[:-1])

0       2015.1.25 08:00
1        2015.3.5 08:00
2       2015.4.25 08:00
3        2015.6.5 08:00
4       2015.7.25 08:00
             ...       
101    2015.12.31 00:00
102     2015.2.27 12:00
103     2015.5.22 12:00
104     2015.8.21 12:00
105    2015.11.20 12:00
Length: 106, dtype: object

In [53]:
data.unique()[-16]

'2015.02.29 00:00'

In [52]:
pd.to_datetime(data.unique()[-16:-15], format = '%Y.%m.%d %H:%M')

ValueError: day is out of range for month, at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [54]:
url

'http://www.bok.or.kr/portal/stats/sddsTrgetStats/listStats.do?menuNo=200777'

In [73]:
categories = conf['data']['stat_date']['url_info']

for name, url in categories.items():
    logger.info(f"upload stat_date for {name} starts!")
    max_date = postgres_conn.get_maxmin_col(conf['schema_name'], conf['data']['stat_date']['table_name'], column = '공표일',
                            where = [f"카테고리 = '{name}'"])[0]

    start_year = conf['data']['stat_date']['start_year'] if max_date is None else max(max_date.year, conf['data']['stat_date']['start_year'])
    data = ''
    
    for year in range(start_year, datetime.now().year+1):
        if year != 2011:
            continue
        logger.info(f"processing year... {year}")
        data = get_statdate_table(f"{url}&year={year}", year)
        data['category'] = name
    break

2024-04-09 16:17:40,681 (335566890.py 4) INFO ::: upload stat_date for 통계종류별 공표일정 starts!
2024-04-09 16:17:40,682 (335566890.py 14) INFO ::: processing year... 2011


In [75]:
data

Unnamed: 0,index,static,info,info_processing,category
0,2011.01,통화 및 유동성,1.12 12:00 (2010.11),[1.12 12:00 (2010.11)],통계종류별 공표일정
1,2011.02,통화 및 유동성,2.10 12:00 (2010.12),[2.10 12:00 (2010.12)],통계종류별 공표일정
2,2011.03,통화 및 유동성,3.9 12:00 (2011.1),[3.9 12:00 (2011.1)],통계종류별 공표일정
3,2011.04,통화 및 유동성,4.11 12:00 (2011.2),[4.11 12:00 (2011.2)],통계종류별 공표일정
4,2011.05,통화 및 유동성,5.12 12:00 (2011.3),[5.12 12:00 (2011.3)],통계종류별 공표일정
...,...,...,...,...,...
199,2011.08,소비자동향조사,8.25 06:00 (2011.8),[8.25 06:00 (2011.8)],통계종류별 공표일정
200,2011.09,소비자동향조사,9.27 06:00 (2011.9),[9.27 06:00 (2011.9)],통계종류별 공표일정
201,2011.10,소비자동향조사,10.27 06:00 (2011.10),[10.27 06:00 (2011.10)],통계종류별 공표일정
202,2011.11,소비자동향조사,11.24 06:00 (2011.11),[11.24 06:00 (2011.11)],통계종류별 공표일정


In [62]:
data[data['date'].isnull()]

Unnamed: 0,index,static,info,date,category
159,2011.04,산업연관표,,,통계종류별 공표일정
173,2011.06,고용표,,,통계종류별 공표일정


In [76]:
p = '[{\'name\': \'모빌리티서비스\', \'description\': "모빌리티(Mobility)는 사람들의 이동을 편리하게 하는 데 기여하는 각종 서비스나 이동수단을 폭넓게 말하는 개념이다. 최근에는 버스, 택시, 철도, 공유차량 등 다양한 이동수단을 제조 단계에서부터 서비스와 결합시키는 \'서비스로서의 모빌리티(마스/MaaS, Mobility as a Service)’가 모빌리티서비스에서 주목되고 있다. 교통 산업, 운송업, 자동차 산업, 자율주행기술, 전기차 산업, 공유 모빌리티 산업과 관련이 있다.", \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'화물운송\', \'description\': \'안전운임제의 적용 여부, 유가 상승, 화물운송 입찰제도에 대한 화물주와 차주 간의 이견 발생 등의 이슈가 있으며 물류 운송 및 배송업과 관련 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'국제물류\', \'description\': \'국제물류는 해운운임 상승과 선박 지연, 우크라이나 전쟁으로 인한 노선 중단 및 국제 유가 상승, 코로나19로 인한 중국의 항만 봉쇄 등의 이슈에 영향을 받으며 물류, 유통, 항만 등과 관련 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'해운항만물류\', \'description\': \'코로나 팬데믹 시기의 항만 적체 현상과 글로벌 물류 공급망 이슈, 유가 및 물류비 상승, 물류 적체와 실질 선복 공급량의 감소 등의 이슈가 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'코로나19\', \'description\': \'2019년 처음 발생하여 보고된 급성 호흡기 전염병으로서 전 세계에 빠른 속도로 확산되어 수많은 확진자와 사망자를 발생시켰다. 높은 감염력과 치사율로 인해 비대면 문화를 확산시켰으며 제약 바이오 및 의료기기 등과 관련 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'해운운임\', \'description\': \'화물 등을 운송하는 데 드는 비용으로서 컨테이너 해운시장에서의 공급망 붕괴, 중국 상해항 봉쇄, IMO 환경 규제로 인한 실지 선복 증가의 제약 등의 이슈가 있다. 해상물류 및 해운업, 조선업 분야와 관련 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}]'

In [80]:
p

'[{\'name\': \'모빌리티서비스\', \'description\': "모빌리티(Mobility)는 사람들의 이동을 편리하게 하는 데 기여하는 각종 서비스나 이동수단을 폭넓게 말하는 개념이다. 최근에는 버스, 택시, 철도, 공유차량 등 다양한 이동수단을 제조 단계에서부터 서비스와 결합시키는 \'서비스로서의 모빌리티(마스/MaaS, Mobility as a Service)’가 모빌리티서비스에서 주목되고 있다. 교통 산업, 운송업, 자동차 산업, 자율주행기술, 전기차 산업, 공유 모빌리티 산업과 관련이 있다.", \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'화물운송\', \'description\': \'안전운임제의 적용 여부, 유가 상승, 화물운송 입찰제도에 대한 화물주와 차주 간의 이견 발생 등의 이슈가 있으며 물류 운송 및 배송업과 관련 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'국제물류\', \'description\': \'국제물류는 해운운임 상승과 선박 지연, 우크라이나 전쟁으로 인한 노선 중단 및 국제 유가 상승, 코로나19로 인한 중국의 항만 봉쇄 등의 이슈에 영향을 받으며 물류, 유통, 항만 등과 관련 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\': \'해운항만물류\', \'description\': \'코로나 팬데믹 시기의 항만 적체 현상과 글로벌 물류 공급망 이슈, 유가 및 물류비 상승, 물류 적체와 실질 선복 공급량의 감소 등의 이슈가 있다.\', \'type\': {\'name\': \'keyword\', \'country\': \'KR\', \'code\': None}}, {\'name\':

In [89]:
json.dumps([{'a': 'b'}, {"cc": 'sdfs'}])

'[{"a": "b"}, {"cc": "sdfs"}]'

In [84]:
import json
json.loads(f"{p}")

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 3 (char 2)

# TEST

In [114]:
stat_list[stat_list['통계표코드'] == '402Y014']

Unnamed: 0,통계표코드,주기
332,402Y014,M


In [88]:
p = bok.get_statistic_search(stat, 
                                         period = period, 
                                         start = period_maker(conf['api_start_date'], period), 
                                         end = period_maker(os.environ['_ts'], period), 
                                         _ts = os.environ['_ts'], rename = conf['data']['stat_search']['rename'])



2024-02-06 01:38:19,580 (api.py 92) INFO ::: Read 100000/295265 rows....
2024-02-06 01:38:30,439 (api.py 92) INFO ::: Read 200000/295265 rows....
2024-02-06 01:38:40,221 (api.py 92) INFO ::: Read 295265/295265 rows....


In [89]:
p[p.duplicated(key, keep = False)].sort_values(key)

Unnamed: 0,통계표코드,통계명,통계항목코드1,통계항목명1,통계항목코드2,통계항목명2,통계항목코드3,통계항목명3,통계항목코드4,통계항목명4,단위,가중치,시점,값,_ts


In [106]:
p = bok.get_statistic_search('404Y014', 
                                         period = 'M', 
                                         start = period_maker(conf['api_start_date'], 'M'), 
                                         end = period_maker(os.environ['_ts'], 'M'), 
                                         _ts = os.environ['_ts'], rename = conf['data']['stat_search']['rename'])

2024-02-06 01:44:19,060 (api.py 92) INFO ::: Read 100000/295265 rows....
2024-02-06 01:44:29,230 (api.py 92) INFO ::: Read 200000/295265 rows....
2024-02-06 01:44:37,935 (api.py 92) INFO ::: Read 295265/295265 rows....
