# Set Environment

In [None]:
%cd drive/MyDrive/Projects/DLNowcasting/dataGenerator

/content/drive/MyDrive/Projects/DLNowcasting/dataGenerator


In [None]:
!chmod 777 x13as/Linux/x13as

In [None]:
!pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.13.0


In [None]:
# !pip install pmdarima --user

In [None]:
######################################################################
##                                                                  ##
##   Created By Beomseok Seo 2023.01.30                             ##
##   Modified By Beomseok Seo 2023.03.01 to distribute to public    ##
##   Modified By CHB 2023.03.06                                     ##
##                                                                  ##
##   When you modify the code, please keep the creator information  ##
##   and add the modifier information bellow!                       ##
##                                                                  ##
##                                                                  ##
##   This code works with statAPI.py                                ##
##                                                                  ##
######################################################################

# KOSIS 통계표 검색 : https://kosis.kr/openapi/devGuide/devGuide_0203List.jsp
# ECOS 통계표 검색 : https://ecos.bok.or.kr/api/#/DevGuide/StatisticalCodeSearch

In [None]:
import os
import platform
import copy
import random
import pandas as pd
import numpy as np
import seaborn as sns
# import pmdarima as pm
import matplotlib.pylab as plt
from dateutil.relativedelta import relativedelta
from datetime import datetime, date
from statAPI import getECOS, getKOSIS
from typing import Dict, Iterable, Tuple, List
from statsmodels.tsa.x13 import x13_arima_analysis
from sklearn.ensemble import RandomForestRegressor as RFR
from  xgboost import XGBRegressor as XGBR
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

In [None]:
from statsmodels.regression.quantile_regression import QuantReg
from statsmodels.tools.tools import add_constant
from statsmodels.tsa.arima.model import ARIMA

from tqdm import tqdm_notebook
from itertools import product

In [None]:
os.environ['X13PATH'] = os.path.join(os.path.abspath(os.curdir),'x13as', platform.system())

In [None]:
os.environ['X13PATH']

'/content/drive/MyDrive/Projects/DLNowcasting/dataGenerator/x13as/Linux'

In [None]:
pd.set_option('display.max.columns', 500)
pd.set_option("display.max_rows", 2000) #결과창에서 줄임표없이 최대한 많은 결과가 보이도록 함.

In [None]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    # torch.manual_seed(seed)
    # torch.cuda.manual_seed(seed)  # type: ignore
    # torch.backends.cudnn.deterministic = True  # type: ignore
    # torch.backends.cudnn.benchmark = True  # type: ignore

In [None]:
seed_everything(0)

# Generate Data

## statAPI

In [None]:
def getGeneratoer5YInterval(index: Iterable[str]) -> Iterable[Tuple[str]]:
  i = 0
  while i < len(index):
    start = index[i]
    i += 12 * 5
    end = index[min(i, len(index)) - 1]
    yield (start, end)

In [None]:
def convertStockToFlow(dat: pd.DataFrame) -> pd.DataFrame:
  dat = copy.deepcopy(dat)
  dat[list(map(lambda s: not s.endswith('01'), dat.index))] = dat.diff()[list(map(lambda s: not s.endswith('01'), dat.index))]
  return dat

In [None]:
def moveAverage12Month(series: pd.Series) -> pd.Series:
  return series.rolling(12).mean()

In [None]:
def generateRawData(
    start='1990-01-01',
    end=datetime.today().strftime('%Y-%m-%d')) -> pd.DataFrame:

    # frame
    date_range = pd.date_range(
        start=start,
        end=datetime.strptime(end, '%Y-%m-%d') + relativedelta(months=1),
        freq='M')
    macro_dat = pd.DataFrame(index=date_range.strftime('%Y%m'))

    # GDP(SA)
    # GDP(SA)
    df_ecos_q = getECOS('200Y004','Q','1990Q1','2009Q4','1400')
    df_ecos_q = pd.concat([df_ecos_q, getECOS('200Y004','Q','2010Q1','2023Q4','1400')])
    df_ecos_q.index = [str(int(x[:4])*100+int(x[-1])*3) for x in df_ecos_q.index]
    df_ecos_q.columns = ['GDP(SA)']
    macro_dat['GDP(SA)'] = df_ecos_q

    # 통화금융

    ## 협의통화(평잔,SA)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('101Y018','M',start, end, 'BBLS00')])
    df_ecos.columns = ['협의통화(평잔,SA)']
    macro_dat['협의통화(평잔,SA)'] = df_ecos

    ## 광의통화(평잔,SA)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('101Y003','M',start, end, 'BBHS00')])
    df_ecos.columns = ['광의통화(평잔,SA)']
    macro_dat['광의통화(평잔,SA)'] = df_ecos

    ## 금융기관유동성(평잔,SA)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('111Y003','M',start, end, 'LAS0000')])
    df_ecos.columns = ['금융기관유동성(평잔,SA)']
    macro_dat['금융기관유동성(평잔,SA)'] = df_ecos

    ## 예금은행총수신(평잔)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('104Y014','M',start, end, 'BCA8')])
    df_ecos.columns = ['예금은행총수신(평잔)']
    macro_dat['예금은행총수신(평잔)'] = df_ecos

    ## 예금은행대출금(말잔)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('104Y016','M',start, end, 'BDCA1')])
    df_ecos.columns = ['예금은행대출금(말잔)']
    macro_dat['예금은행대출금(말잔)'] = df_ecos

    ## 무담보콜금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '1020000')])
    df_ecos.columns = ['무담보콜금리']
    macro_dat['무담보콜금리'] = df_ecos

    ## 국고채1년물금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '5030000')])
    df_ecos.columns = ['국고채1년물금리']
    macro_dat['국고채1년물금리'] = df_ecos

    ## 국고채3년물금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '5020000')])
    df_ecos.columns = ['국고채3년물금리']
    macro_dat['국고채3년물금리'] = df_ecos

    ## 국고채5년물금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '5040000')])
    df_ecos.columns = ['국고채5년물금리']
    macro_dat['국고채5년물금리'] = df_ecos

    ## 통안증권1년물금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '6010100')])
    df_ecos.columns = ['통안증권1년물금리']
    macro_dat['통안증권1년물금리'] = df_ecos

    ## 통안증권2년물금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '6010200')])
    df_ecos.columns = ['통안증권2년물금리']
    macro_dat['통안증권2년물금리'] = df_ecos

    ## 회사채3년AA금리
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('721Y001','M',start, end, '7020000')])
    df_ecos.columns = ['회사채3년AA금리']
    macro_dat['회사채3년AA금리'] = df_ecos

    ## 국고채1년3년금리차
    macro_dat['국고채3년1년금리차'] = macro_dat['국고채3년물금리'] -  macro_dat['국고채1년물금리']

    ## 국고채5년1년금리차
    macro_dat['국고채5년1년금리차'] = macro_dat['국고채5년물금리'] -  macro_dat['국고채1년물금리']

    ## 통안증권2년1년금리차
    macro_dat['통안증권2년1년금리차'] = macro_dat['통안증권2년물금리'] - macro_dat['통안증권1년물금리']

    ## 회사채3년AA국채3년금리차
    macro_dat['회사채3년AA국채3년금리차'] =  macro_dat['회사채3년AA금리'] - macro_dat['국고채5년물금리']

    # 자산가격

    ## KOSPI(평균)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('901Y014','M',start, end, '1080000')])
    df_ecos.columns = ['KOSPI(평균)']
    macro_dat['KOSPI(평균)'] = df_ecos

    ## 주택매매가격지수
    df_kosis = getKOSIS('DT_40803_N0001','M',macro_dat.index[0],macro_dat.index[-1],'sales+',orgId='408',obj1='00+',obj2='a0+',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['주택매매가격지수']
    macro_dat['주택매매가격지수'] = df_kosis

    ## 주택전세가격지수
    df_kosis = getKOSIS('DT_40803_N0002','M',macro_dat.index[0],macro_dat.index[-1],'sales+',orgId='408',obj1='00+',obj2='a0+',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['주택전세가격지수']
    macro_dat['주택전세가격지수'] = df_kosis

    # 물가

    ## 생산자물가지수
    df_kosis = getKOSIS('DT_404Y014','M',macro_dat.index[0],macro_dat.index[-1],'13103134604999+',orgId='301',obj1='13102134604ACC_CD.*AA',obj2='',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['생산자물가지수']
    macro_dat['생산자물가지수'] = df_kosis

    ## 소비자물가지수
    df_kosis = getKOSIS('DT_1J20003','M',macro_dat.index[0],macro_dat.index[-1],'T+',orgId='101',obj1='T10',obj2='',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비자물가지수']
    macro_dat['소비자물가지수'] = df_kosis

    print('haha')

    # 소비자물가지수(농산물석유류제외)
    df_kosis = getKOSIS('DT_1J20007','M',macro_dat.index[0],macro_dat.index[-1],item='T+',orgId='101',obj1='QB+',title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비자물가지수(농산물석유류제외)']
    macro_dat['소비자물가지수(농산물석유류제외)'] = df_kosis

    # 소비자물가지수(식료품 에너지 제외)
    df_kosis = getKOSIS('DT_1J20009','M',macro_dat.index[0],macro_dat.index[-1],item='T+',orgId='101',obj1='DB+',title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비자물가지수(식료품에너지제외)']
    macro_dat['소비자물가지수(식료품에너지제외)'] = df_kosis


    # 수입물가지수
    df_kosis = getKOSIS('DT_401Y015','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134643999+',
                        orgId='301',
                        obj1='13102134643ACC_CD.*AA',
                        obj2='13102134643CRR_CTRT_CD.D+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['수입물가지수']
    macro_dat['수입물가지수'] = df_kosis

    '''
    ## 국제원자재

    # WTI원유수입단가
    df_kosis = getKOSIS('DT_AA12','M',macro_dat.index[0],macro_dat.index[-1],
                        item='1639213103390016T10+',
                        orgId='392',
                        obj1='1539213102390016A.33+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['WTI원유수입단가']
    macro_dat['WTI원유수입단가'] = df_kosis

    # 두바이원유수입단가
    df_kosis = getKOSIS('DT_AA12','M',macro_dat.index[0],macro_dat.index[-1],
                        item='1639213103390016T10+',
                        orgId='392',
                        obj1='1539213102390016A.31+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['두바이원유수입단가']
    macro_dat['두바이원유수입단가'] = df_kosis

    # 브렌트원유수입단가
    df_kosis = getKOSIS('DT_AA12','M',macro_dat.index[0],macro_dat.index[-1],
                        item='1639213103390016T10+',
                        orgId='392',
                        obj1='1539213102390016A.32+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['브렌트원유수입단가']
    macro_dat['브렌트원유수입단가'] = df_kosis
    '''

    # 대외거래

    ## 경상수지(SA)
    df_kosis = getKOSIS('DT_301Y017','M',macro_dat.index[0],macro_dat.index[-1],'13103134664999+',orgId='301',obj1='13102134664ACC_CD.SA000',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['경상수지(SA)']
    macro_dat['경상수지(SA)'] = df_kosis

    ## 수출액
    df_kosis = getKOSIS('DT_134001_001','M',macro_dat.index[0],macro_dat.index[-1],'T002+',orgId='134',obj1='DATA',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['수출액']
    macro_dat['수출액'] = df_kosis

    ## 수입액
    df_kosis = getKOSIS('DT_134001_001','M',macro_dat.index[0],macro_dat.index[-1],'T004+',orgId='134',obj1='DATA',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['수입액']
    macro_dat['수입액'] = df_kosis

    ## 무역수지
    df_kosis = getKOSIS('DT_134001_001','M',macro_dat.index[0],macro_dat.index[-1],'T005+',orgId='134',obj1='DATA',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['무역수지']
    macro_dat['무역수지'] = df_kosis

    ## 수출물량지수
    df_kosis = getKOSIS('DT_403Y002','M',macro_dat.index[0],macro_dat.index[-1],'13103134523999+',orgId='301',obj1='13102134523ACC_CD.*AA',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['수출물량지수']
    macro_dat['수출물량지수'] = df_kosis

    ## 수입물량지수
    df_kosis = getKOSIS('DT_403Y004','M',macro_dat.index[0],macro_dat.index[-1],'13103134626999+',orgId='301',obj1='13102134626ACC_CD.*AA',title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['수입물량지수']
    macro_dat['수입물량지수'] = df_kosis

    # 환율

    ## 원달러환율(평균)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('731Y004','M',start, end, '0000001', '0000100')])
    df_ecos.columns = ['원달러환율(평균)']
    macro_dat['원달러환율(평균)'] = df_ecos

    ## 원엔환율(평균)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('731Y004','M',start, end, '0000002', '0000100')])
    df_ecos.columns = ['원엔환율(평균)']
    macro_dat['원엔환율(평균)'] = df_ecos

    ## 원유로환율(평균)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('731Y004','M',start, end, '0000003', '0000100')])
    df_ecos.columns = ['원유로환율(평균)']
    macro_dat['원유로환율(평균)'] = df_ecos

    ## 원파운드환율(평균)
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('731Y004','M',start, end, '0000012', '0000100')])
    df_ecos.columns = ['원파운드환율(평균)']
    macro_dat['원파운드환율(평균)'] = df_ecos

    # 기업심리

    ## 전산업매출실적BSI
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.99988',
                        obj2='13102134673BSI_CD.AB+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업매출실적BSI']
    macro_dat['전산업매출실적BSI'] = df_kosis

    ## 전산업업황실적BSI
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.99988',
                        obj2='13102134673BSI_CD.AA+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업업황실적BSI']
    macro_dat['전산업업황실적BSI'] = df_kosis

    ## 전산업채산성실적BSI
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.99988',
                        obj2='13102134673BSI_CD.AE+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업채산성실적BSI']
    macro_dat['전산업채산성실적BSI'] = df_kosis

    ## 전산업자금사정실적BSI
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.99988',
                        obj2='13102134673BSI_CD.AO+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업자금사정실적BSI']
    macro_dat['전산업자금사정실적BSI'] = df_kosis

    ## 전산업인력사정실적BSI
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.99988',
                        obj2='13102134673BSI_CD.AJ+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업인력사정실적BSI']
    macro_dat['전산업인력사정실적BSI'] = df_kosis

    ## 제조업가동률실적BSI
    df_kosis = getKOSIS('DT_512Y007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134491999+',
                        orgId='301',
                        obj1='13102134491BSI_CD.AK+',
                        obj2='13102134491BUSINESS_TYPE_CD.C0000+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업가동률실적BSI']
    macro_dat['제조업가동률실적BSI'] = df_kosis

    ## 제조업수출실적BSI
    df_kosis = getKOSIS('DT_512Y007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134491999+',
                        orgId='301',
                        obj1='13102134491BSI_CD.AM+',
                        obj2='13102134491BUSINESS_TYPE_CD.C0000+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업수출실적BSI']
    macro_dat['제조업수출실적BSI'] = df_kosis

    ## 제조업내수판매실적BSI
    df_kosis = getKOSIS('DT_512Y007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134491999+',
                        orgId='301',
                        obj1='13102134491BSI_CD.AL+',
                        obj2='13102134491BUSINESS_TYPE_CD.C0000+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업내수판매실적BSI']
    macro_dat['제조업내수판매실적BSI'] = df_kosis

    ## 제조업신규수주실적BSI
    df_kosis = getKOSIS('DT_512Y007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134491999+',
                        orgId='301',
                        obj1='13102134491BSI_CD.AL+',
                        obj2='13102134491BUSINESS_TYPE_CD.C0000+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업신규수주실적BSI']
    macro_dat['제조업신규수주실적BSI'] = df_kosis

    ## 제조업업황실적BSI(SA)
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.C0000+',
                        obj2='13102134673BSI_CD.AS+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업업황실적BSI(SA)']
    macro_dat['제조업업황실적BSI(SA)'] = df_kosis

    ## 비제조업업황실적BSI(SA)
    df_kosis = getKOSIS('DT_512Y013','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134673999+',
                        orgId='301',
                        obj1='13102134673BUSINESS_TYPE_CD.Y9900+',
                        obj2='13102134673BSI_CD.AS+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['비제조업업황실적BSI(SA)']
    macro_dat['비제조업업황실적BSI(SA)'] = df_kosis

    ## 제조업업황전망BSI(SA)
    df_kosis = getKOSIS('DT_512Y014','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134488999+',
                        orgId='301',
                        obj1='13102134488BUSINESS_TYPE_CD.C0000+',
                        obj2='13102134488BSI_CD.BS+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업업황전망BSI(SA)']
    macro_dat['제조업업황전망BSI(SA)'] = df_kosis

    ## 비제조업업황전망BSI(SA)
    df_kosis = getKOSIS('DT_512Y014','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134488999+',
                        orgId='301',
                        obj1='13102134488BUSINESS_TYPE_CD.Y9900+',
                        obj2='13102134488BSI_CD.BS+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['비제조업업황전망BSI(SA)']
    macro_dat['비제조업업황전망BSI(SA)'] = df_kosis

    ## 소상공인경기체감
    df_kosis = getKOSIS('DT_S0001N_001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='s0+',
                        orgId='142',
                        obj1='00+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소상공인경기체감']
    macro_dat['소상공인경기체감'] = df_kosis

    ## 소상공인경기전망
    df_kosis = getKOSIS('DT_S0001N_001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='s1+',
                        orgId='142',
                        obj1='00+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소상공인경기전망']
    macro_dat['소상공인경기전망'] = df_kosis

    # 경제심리

    ## NSI
    df_ecos = pd.DataFrame()
    for start, end in getGeneratoer5YInterval(macro_dat.index):
      df_ecos = pd.concat([df_ecos, getECOS('521Y001','M',start, end, 'A001')])
    df_ecos.columns = ['NSI']
    macro_dat['NSI'] = df_ecos

    ## 경제심리지수(순환변동치)
    df_kosis = getKOSIS('DT_513Y001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134473999+',
                        orgId='301',
                        obj1='13102134473ACC_CD.E2000+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['경제심리지수(순환변동치)']
    macro_dat['경제심리지수(순환변동치)'] = df_kosis

    # 산업활동


    ## 경기선행종합지수
    df_kosis = getKOSIS('DT_1C8015','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='A00+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['경기선행종합지수']
    macro_dat['경기선행종합지수'] = df_kosis

    ## 경기동행종합지수
    df_kosis = getKOSIS('DT_1C8015','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='B00+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['경기동행종합지수']
    macro_dat['경기동행종합지수'] = df_kosis

    ## 선행지수순환변동치
    df_kosis = getKOSIS('DT_1C8015','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='A03+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['선행지수순환변동치']
    macro_dat['선행지수순환변동치'] = df_kosis

    ## 동행지수순환변동치
    df_kosis = getKOSIS('DT_1C8015','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='B03+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['동행지수순환변동치']
    macro_dat['동행지수순환변동치'] = df_kosis

    ## 설비투자지수(SA)
    df_kosis = getKOSIS('DT_1F70011','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T5+',
                        orgId='101',
                        obj1='C',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['설비투자지수(SA)']
    macro_dat['설비투자지수(SA)'] = df_kosis

    ## 전산업생산지수(SA)
    df_kosis = getKOSIS('DT_1JH20202','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='1',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업생산지수(SA)']
    macro_dat['전산업생산지수(SA)'] = df_kosis

    ## 광공업생산지수(SA)
    df_kosis = getKOSIS('DT_1JH20202','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='1B+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['광공업생산지수(SA)']
    macro_dat['광공업생산지수(SA)'] = df_kosis

    ## 제조업생산지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='C+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업생산지수(SA)']
    macro_dat['제조업생산지수(SA)'] = df_kosis

    ## 화학제조업생산지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='C20+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['화학제조업생산지수(SA)']
    macro_dat['화학제조업생산지수(SA)'] = df_kosis

    ## 전기장비제조업생산지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='C28+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전기장비제조업생산지수(SA)']
    macro_dat['전기장비제조업생산지수(SA)'] = df_kosis

    ## 자동차트레일러제조업생산지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='C30+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자동차트레일러제조업생산지수(SA)']
    macro_dat['자동차트레일러제조업생산지수(SA)'] = df_kosis

    ## 제품출하지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T11+',
                        orgId='101',
                        obj1='00+',
                        obj2='0+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제품출하지수(SA)']
    macro_dat['제품출하지수(SA)'] = df_kosis

    ## 광업제조업제품출하지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='A+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['광업제조업제품출하지수(SA)']
    macro_dat['광업제조업제품출하지수(SA)'] = df_kosis

    ## 제조업제품출하지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='C+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업제품출하지수(SA)']
    macro_dat['제조업제품출하지수(SA)'] = df_kosis

    ## 화학제조업출하지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='C20+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['화학제조업출하지수(SA)']
    macro_dat['화학제조업출하지수(SA)'] = df_kosis

    ## 전기장비제조업출하지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='C28+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전기장비제조업출하지수(SA)']
    macro_dat['전기장비제조업출하지수(SA)'] = df_kosis

    ## 자동차트레일러제조업출하지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='C30+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자동차트레일러제조업출하지수(SA)']
    macro_dat['자동차트레일러제조업출하지수(SA)'] = df_kosis

    ## 제품재고지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='0+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제품재고지수(SA)']
    macro_dat['제품재고지수(SA)'] = df_kosis

    ## 제조업재고지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='C+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['제조업재고지수(SA)']
    macro_dat['제조업재고지수(SA)'] = df_kosis

    ## 화학제조업재고지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='C20+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['화학제조업재고지수(SA)']
    macro_dat['화학제조업재고지수(SA)'] = df_kosis

    ## 전기장비제조업재고지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='C28+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전기장비제조업재고지수(SA)']
    macro_dat['전기장비제조업재고지수(SA)'] = df_kosis

    ## 자동차트레일러제조업재고지수(SA)
    df_kosis = getKOSIS('DT_1F02001','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='C30+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자동차트레일러제조업재고지수(SA)']
    macro_dat['자동차트레일러제조업재고지수(SA)'] = df_kosis

    ## 자본재생산지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='1+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자본재생산지수(SA)']
    macro_dat['자본재생산지수(SA)'] = df_kosis

    ## 중간재생산지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='2+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['중간재생산지수(SA)']
    macro_dat['중간재생산지수(SA)'] = df_kosis

    ## 소비재생산지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T20+',
                        orgId='101',
                        obj1='00+',
                        obj2='3+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비재생산지수(SA)']
    macro_dat['소비재생산지수(SA)'] = df_kosis

    ## 자본재출하지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='1+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자본재출하지수(SA)']
    macro_dat['자본재출하지수(SA)'] = df_kosis

    ## 중간재출하지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='2+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['중간재출하지수(SA)']
    macro_dat['중간재출하지수(SA)'] = df_kosis

    ## 소비재출하지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T21+',
                        orgId='101',
                        obj1='00+',
                        obj2='3+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비재출하지수(SA)']
    macro_dat['소비재출하지수(SA)'] = df_kosis

    ## 자본재재고지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='1+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자본재재고지수(SA)']
    macro_dat['자본재재고지수(SA)'] = df_kosis

    ## 중간재재고지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='2+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['중간재재고지수(SA)']
    macro_dat['중간재재고지수(SA)'] = df_kosis

    ## 소비재재고지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T22+',
                        orgId='101',
                        obj1='00+',
                        obj2='3+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비재재고지수(SA)']
    macro_dat['소비재재고지수(SA)'] = df_kosis

    ## 자본재내수출하지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T23+',
                        orgId='101',
                        obj1='00+',
                        obj2='1+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['자본재내수출하지수(SA)']
    macro_dat['자본재내수출하지수(SA)'] = df_kosis

    ## 중간재내수출하지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T23+',
                        orgId='101',
                        obj1='00+',
                        obj2='2+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['중간재내수출하지수(SA)']
    macro_dat['중간재내수출하지수(SA)'] = df_kosis

    ## 소비재내수출하지수(SA)
    df_kosis = getKOSIS('DT_1F02003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T23+',
                        orgId='101',
                        obj1='00+',
                        obj2='3+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비재내수출하지수(SA)']
    macro_dat['소비재내수출하지수(SA)'] = df_kosis

    # 서비스업활동

    ## 전산업생산지수서비스업(SA)
    df_kosis = getKOSIS('DT_1JH20202','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T1+',
                        orgId='101',
                        obj1='1C+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['전산업생산지수서비스업(SA)']
    macro_dat['전산업생산지수서비스업(SA)'] = df_kosis



    ## 소매판매액지수(SA)
    df_kosis = getKOSIS('DT_1K41012','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T3+',
                        orgId='101',
                        obj1='G0',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소매판매액지수(SA)']
    macro_dat['소매판매액지수(SA)'] = df_kosis

    # 건설업활동

    ## 건설수주액(SA)
    df_kosis = getKOSIS('DT_1G1B045','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T10+',
                        orgId='101',
                        obj1='0',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['건설수주액(SA)']
    macro_dat['건설수주액(SA)'] = df_kosis

    ## 공공발주건설수주액
    df_kosis = getKOSIS('DT_1G1B002','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T10+',
                        orgId='101',
                        obj1='1+',
                        obj2='0+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['공공발주건설수주액']
    macro_dat['공공발주건설수주액'] = df_kosis

    ## 민간발주건설수주액
    df_kosis = getKOSIS('DT_1G1B002','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T10+',
                        orgId='101',
                        obj1='2+',
                        obj2='0+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['민간발주건설수주액']
    macro_dat['민간발주건설수주액'] = df_kosis

    ## 건설기성액(SA)
    df_kosis = getKOSIS('DT_1G18004','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T10+',
                        orgId='101',
                        obj1='0+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['건설기성액(SA)']
    macro_dat['건설기성액(SA)'] = df_kosis

    ## 공공발주건설기성액(SA)
    df_kosis = getKOSIS('DT_1G18003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T10+',
                        orgId='101',
                        obj1='1+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['공공발주건설기성액(SA)']
    macro_dat['공공발주건설기성액(SA)'] = df_kosis

    ## 민간발주건설기성액(SA)
    df_kosis = getKOSIS('DT_1G18003','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T10+',
                        orgId='101',
                        obj1='2+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['민간발주건설기성액(SA)']
    macro_dat['민간발주건설기성액(SA)'] = df_kosis

    # 고용노동

    ## 취업자수(SA)
    df_kosis = getKOSIS('DT_1DA9001S','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T30+',
                        orgId='101',
                        obj1='00+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['취업자수(SA)']
    macro_dat['취업자수(SA)'] = df_kosis

    ## 실업자수(SA)
    df_kosis = getKOSIS('DT_1DA9001S','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T40+',
                        orgId='101',
                        obj1='00+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['실업자수(SA)']
    macro_dat['실업자수(SA)'] = df_kosis

    ## 상용근로자수(SA)
    df_kosis = getKOSIS('DT_1DA9006S','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T30+',
                        orgId='101',
                        obj1='30+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['상용근로자수(SA)']
    macro_dat['상용근로자수(SA)'] = df_kosis

    ## 임시근로자수(SA)
    df_kosis = getKOSIS('DT_1DA9006S','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T30+',
                        orgId='101',
                        obj1='35+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['임시근로자수(SA)']
    macro_dat['임시근로자수(SA)'] = df_kosis


    ## 고용률(SA)
    df_kosis = getKOSIS('DT_1DA9001S','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T90+',
                        orgId='101',
                        obj1='00+',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['고용률(SA)']
    macro_dat['고용률(SA)'] = df_kosis

    ## 실업률(SA)
    df_kosis = getKOSIS('DT_1DA9001S','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T80+',
                        orgId='101',
                        obj1='00+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['실업률(SA)']
    macro_dat['실업률(SA)'] = df_kosis

    # 소비자동향조사

    ## 소비자심리지수
    df_kosis = getKOSIS('DT_511Y002','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134688999+',
                        orgId='301',
                        obj1='13102134688CSI_CD.FME+',
                        obj2='13102134688CSI_CLF_CD.99988',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['소비자심리지수']
    macro_dat['소비자심리지수'] = df_kosis

    ## 현재경기판단CSI
    df_kosis = getKOSIS('DT_511Y002','M',macro_dat.index[0],macro_dat.index[-1],
                        item='13103134688999+',
                        orgId='301',
                        obj1='13102134688CSI_CD.FMAB+',
                        obj2='13102134688CSI_CLF_CD.99988',
                        title='')
    # df_kosis = (df_kosis/df_kosis.shift(12)*100-100)['200501':]
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['현재경기판단CSI']
    macro_dat['현재경기판단CSI'] = df_kosis

    # 재정
    ## 재정총수입
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD110+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정총수입']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정총수입'] = df_kosis

    ## 재정경상수입
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD111+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정경상수입']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정경상수입'] = df_kosis

    ## 재정자본수입
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD112+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정자본수입']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정자본수입'] = df_kosis

    ## 재정총지출및순융자
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD120+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정총지출및순융자']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정총지출및순융자'] = df_kosis

    ## 재정총지출
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD121+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정총지출']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정총지출'] = df_kosis

    ## 재정경상지출
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD122+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정경상지출']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정경상지출'] = df_kosis

    ## 재정자본지출
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD123+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정자본지출']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정자본지출'] = df_kosis

    ## 재정순융자
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD124+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['재정순융자']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['재정순융자'] = df_kosis

    ## 통합재정수지
    df_kosis = getKOSIS('DT_102N_AD01','M',macro_dat.index[0],macro_dat.index[-1],
                        item='16102AD1+',
                        orgId='102',
                        obj1='15102AD130+',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['통합재정수지']
    df_kosis = convertStockToFlow(df_kosis)
    macro_dat['통합재정수지'] = df_kosis


    # 에너지
    ## 에너지최종소비
    df_kosis = getKOSIS('DT_339001_007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T001+',
                        orgId='339',
                        obj1='A02+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['에너지최종소비']
    macro_dat['에너지최종소비'] = df_kosis

    ## 에너지최종소비
    df_kosis = getKOSIS('DT_339001_007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T001+',
                        orgId='339',
                        obj1='A02+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['에너지최종소비']
    macro_dat['에너지최종소비'] = df_kosis

    ## 산업부문에너지최종소비
    df_kosis = getKOSIS('DT_339001_007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T001+',
                        orgId='339',
                        obj1='A0201+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['산업부문에너지최종소비']
    macro_dat['산업부문에너지최종소비'] = df_kosis

    ## 수송부문에너지최종소비
    df_kosis = getKOSIS('DT_339001_007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T001+',
                        orgId='339',
                        obj1='A0202+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['수송부문에너지최종소비']
    macro_dat['수송부문에너지최종소비'] = df_kosis

    ## 가정부문에너지최종소비
    df_kosis = getKOSIS('DT_339001_007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T001+',
                        orgId='339',
                        obj1='A0203+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['가정부문에너지최종소비']
    macro_dat['가정부문에너지최종소비'] = df_kosis

    ## 상업공공부문에너지최종소비
    df_kosis = getKOSIS('DT_339001_007','M',macro_dat.index[0],macro_dat.index[-1],
                        item='T001+',
                        orgId='339',
                        obj1='A0204+',
                        obj2='',
                        title='')
    df_kosis.index = df_kosis.index.map(str)
    df_kosis.columns = ['상업공공부문에너지최종소비']
    macro_dat['상업공공부문에너지최종소비'] = df_kosis

    return macro_dat

In [None]:
df = generateRawData()

In [None]:
df.shape

In [None]:
df

In [None]:
df.to_excel(f'data_api{datetime.today().strftime("%Y%m%d")}.xlsx')

In [None]:
from google.colab import files
files.upload()
None

Saving data_api20240429.xlsx to data_api20240429.xlsx


In [None]:
data = pd.read_excel(f'data_api{datetime.today().strftime("%Y%m%d")}.xlsx', index_col=0)

In [None]:
data.index = pd.PeriodIndex(data.index, freq='M')

## Load External Data

In [None]:
from google.colab import files
files.upload()
None

Saving nonAPIData20240425.xlsx to nonAPIData20240425.xlsx


In [None]:
dataExternal = pd.read_excel('nonAPIData20240425.xlsx', sheet_name='data', skiprows=12, index_col=0)

In [None]:
dataExternal.index = pd.PeriodIndex(dataExternal.index, freq='M')

In [None]:
dataExternal.columns

Index(['부문별전력사용량-월-총사용량', '부문별전력사용량-월-가정용', '부문별전력사용량-월-공공용',
       '부문별전력사용량-월-서비스업', '부문별전력사용량-월-제조업',
       'Quarterly National Accounts-Canada-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly',
       'Quarterly National Accounts-France-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly',
       'Quarterly National Accounts-Germany-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly',
       'Quarterly National Accounts-Italy-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly',
       'Quarterly National Accounts-Japan-Gross domestic p

In [None]:
renameDict = {
    '부문별전력사용량-월-총사용량':'총전력사용량',
    '부문별전력사용량-월-가정용':'가정용전력사용량',
    '부문별전력사용량-월-공공용':'공공용전력사용량',
    '부문별전력사용량-월-서비스업':'서비스업전력사용량',
    '부문별전력사용량-월-제조업':'제조업전력사용량',
    'Quarterly National Accounts-Canada-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'캐나다GDP(SA)',
    'Quarterly National Accounts-France-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'프랑스GDP(SA)',
    'Quarterly National Accounts-Germany-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'독일GDP(SA)',
    'Quarterly National Accounts-Italy-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'이탈리아GDP(SA)',
    'Quarterly National Accounts-Japan-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'일본GDP(SA)',
    'Quarterly National Accounts-United Kingdom-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'영국GDP(SA)',
    'Quarterly National Accounts-United States-Gross domestic product - expenditure approach-National currency, chained volume estimates, national reference year, quarterly levels, seasonally adjusted-Quarterly':'미국GDP(SA)',
    'Key Short-Term Economic Indicators-Industrial production, s.a.-OECD - Total-Level, ratio or index-Monthly':'OECD산업생산지수(SA)',
    'Key Short-Term Economic Indicators-Total manufacturing, s.a.-OECD - Total-Level, ratio or index-Monthly':'OECD제조업지수(SA)',
    'Key Short-Term Economic Indicators-Construction, s.a.-OECD - Total-Level, ratio or index-Monthly':'OECD건설업지수(SA)',
    'Key Short-Term Economic Indicators-Leading indicator, amplitude adjusted-OECD - Total-Level, ratio or index-Monthly':'OECD경기선행지수(SA)',
    'Merchandise world trade, fixed base 2010=100-World-Volumes, seasonally adjusted-trade-2010=100-Monthly':'세계무역량지수(SA)',
    '유가-일-WTI(뉴욕, 현물)':'WTI유가현물',
    '유가-일-Dubai(현물)':'Dubai유가현물',
    '유가-일-Brent(현물)':'Brent유가현물',
    '상품/원자재-일-대두(선물)':'대두선물가격',
    '상품/원자재-일-금(현물)':'금현물가격',
    '상품/원자재-일-니켈(현물)':'니켈현물가격',
    '상품/원자재-일-아연(현물)':'아연현물가격',
    '상품/원자재-일-옥수수':'옥수수선물가격',
    '상품/원자재-일-소맥':'소맥선물가격',
    '상품/원자재-일-동(현물)':'동현물가격',
    '상품/원자재-일-알루미늄(현물)':'알루미늄현물가격',
    '상품/원자재-일-원면':'원면선물가격',
}

In [None]:
dataExternal = dataExternal.rename(columns=renameDict)

In [None]:
for col in dataExternal.columns:
  if 'GDP' in col:
    dataExternal[col] = dataExternal[col].shift(2)

In [None]:
dataExternal

Unnamed: 0_level_0,총전력사용량,가정용전력사용량,공공용전력사용량,서비스업전력사용량,제조업전력사용량,캐나다GDP(SA),프랑스GDP(SA),독일GDP(SA),이탈리아GDP(SA),일본GDP(SA),영국GDP(SA),미국GDP(SA),OECD산업생산지수(SA),OECD제조업지수(SA),OECD건설업지수(SA),OECD경기선행지수(SA),세계무역량지수(SA),WTI유가현물,Dubai유가현물,Brent유가현물,대두선물가격,금현물가격,니켈현물가격,아연현물가격,옥수수선물가격,소맥선물가격,동현물가격,알루미늄현물가격,원면선물가격
기간 1947-01-01 ~ 2024-04-01 (M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
1947-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1947-02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1947-03,,,,,,,,,,,,545670.3,,,,,,,,,,,,,,,,,
1947-04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1947-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1947-06,,,,,,,,,,,,544223.0,,,,,,,,,,,,,,,,,
1947-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1947-08,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1947-09,,,,,,,,,,,,543108.0,,,,,,,,,,,,,,,,,
1947-10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Merge Data

In [None]:
data = data.join(dataExternal)

## Extend Data

In [None]:
def extend(df, time, topic, gr, gr_type='mom'):
    df = copy.deepcopy(df)
    premon = (datetime.strptime(time, '%Y%m')-relativedelta(months=1)).strftime('%Y%m')
    preyear = (datetime.strptime(time, '%Y%m')-relativedelta(months=12)).strftime('%Y%m')

    if gr_type == 'mom':
        df.loc[time, topic] = df.loc[premon][topic]*(1+gr/100)
    elif gr_type == 'yoy':
        df.loc[time, topic] = df.loc[preyear][topic]*(1+gr/100)
    return df

In [None]:

data = extend(data, '202403', '수출액', 3.1, 'yoy')
data = extend(data, '202403', '수입액', -12.3, 'yoy')
data = extend(data, '202404', '수출액', 13.3, 'yoy')
data = extend(data, '202404', '수입액', 3.4, 'yoy')

In [None]:


data = extend(data, '202401', '소매판매액지수(SA)', 1.0, 'mom')
data = extend(data, '202401', '설비투자지수(SA)', -5.8, 'mom')
data = extend(data, '202401', '건설기성액(SA)', 12.7, 'mom')
data = extend(data, '202401', '제조업생산지수(SA)', -1.5, 'mom')
data = extend(data, '202401', '전산업생산지수서비스업(SA)', -0.3, 'mom')
data = extend(data, '202401', '전산업생산지수(SA)', 0.3, 'mom')

data = extend(data, '202402', '소매판매액지수(SA)', -3.0, 'mom')
data = extend(data, '202402', '설비투자지수(SA)', 9.6, 'mom')
data = extend(data, '202402', '건설기성액(SA)', -1.0, 'mom')
data = extend(data, '202402', '제조업생산지수(SA)', 3.0, 'mom')
data = extend(data, '202402', '전산업생산지수서비스업(SA)', 0.5, 'mom')
data = extend(data, '202402', '전산업생산지수(SA)', 1.1, 'mom')

data = extend(data, '202403', '소매판매액지수(SA)', 1.6, 'mom')
data = extend(data, '202403', '설비투자지수(SA)', -6.6, 'mom')
data = extend(data, '202403', '건설기성액(SA)', -8.7, 'mom')
data = extend(data, '202403', '제조업생산지수(SA)', -3.5, 'mom')
data = extend(data, '202403', '전산업생산지수서비스업(SA)', -0.8, 'mom')
data = extend(data, '202403', '전산업생산지수(SA)', -2.1, 'mom')

## Back Up Data

In [None]:
data.to_excel(f'data{datetime.today().strftime("%Y%m%d")}.xlsx')

In [None]:
# data = pd.read_excel(f'data{20240213}.xlsx', index_col=0)
data = pd.read_excel(f'data{datetime.today().strftime("%Y%m%d")}.xlsx', index_col=0)

In [None]:
data.index = pd.PeriodIndex(data.index, freq='M')

# Load Meta Data

In [None]:
metaData = pd.read_excel('meta20230525.xlsx', index_col='Variable')

In [None]:
len(metaData)

145

# Preprocessing Functions

In [None]:
def preprocess(dat: pd.DataFrame) -> pd.DataFrame:
    #dat = x13SA(dat)
    dat = log(dat)
    dat = convertToQuarterlyMean(dat)
    dat = diff(dat)
    return dat

## SA

In [None]:
def x13SA(dat: pd.DataFrame) -> pd.DataFrame:
    dat = copy.deepcopy(dat)
    for col in filter(lambda col: 'SA' not in col and '순환변동치' not in col and col != '무담보콜금리', dat.columns):
        try:
            dat[col] = x13_arima_analysis(dat.rename(columns={col: 'temp'})['temp'].dropna()).seasadj
        # dat.rename(columns={col: col + '(SA)'}, inplace=True)
        except Exception as err:
            print(f'{col} is failed to SA')
            print(err)
    return dat

## Log

In [None]:
def log(dat: pd.DataFrame) -> pd.DataFrame:
  dat = copy.deepcopy(dat)
  for col in filter(lambda col: '금리' not in col and '수지' not in col and '률' not in col, dat.columns):
    dat[col] = np.log(dat[col]) * 100
  return dat

## Quarterly mean

In [None]:
def convertToQuarterlyMean(dat: pd.DataFrame) -> pd.DataFrame:
    return dat.groupby(dat.index.asfreq('Q')).mean()

## Diff

In [None]:
def diff(dat: pd.DataFrame) -> pd.DataFrame:
    return dat.diff().iloc[1:]

## Drop starting na

In [None]:
def dropStartingNa(dat: pd.DataFrame) -> pd.DataFrame:
  if 'GDP(SA)' in dat.columns:
    tempData = dat.drop(columns=['GDP(SA)'])
  else:
    tempData = dat

  dropIndex = []
  i = 0
  while i < len(dat) and any(tempData.iloc[i].isna()):
      dropIndex.append(dat.index[i])
      i += 1
  return dat.drop(index=dropIndex)

## Set criteria

In [None]:
def rmse(y_pred,y_true, digits=3):
    return np.round(np.sqrt(np.mean((y_pred-y_true)**2)), digits)
def rmseStd(y_pred,y_true, digits=3):
    return np.round(np.std((y_pred-y_true)**2), digits)

def mae(y_pred,y_true, digits=3):
    return np.round(np.mean(np.abs(y_pred-y_true)), digits)
def maeStd(y_pred,y_true, digits=3):
    return np.round(np.std(np.abs(y_pred-y_true)), digits)

def maxe(y_pred,y_true, digits=3):
    return np.round(np.max(np.abs(y_pred-y_true)), digits)
def maxeStd(y_pred,y_true, digits=3):
    return np.round(np.std(np.abs(y_pred-y_true)), digits)



# Ragged-edge Extrapolation

## Pad na

In [None]:
def padNa(dat: pd.DataFrame) -> pd.DataFrame:
  return dat.fillna(0.0)

## Shift Non Target Varialbes

In [None]:
def shiftNonTargetVariables(dat: pd.DataFrame, target: str='GDP(SA)') -> pd.DataFrame:
  dat = copy.deepcopy(dat)
  for col in dat.columns:
    if col == target:
      continue
    while np.isnan(dat.iloc[-1][col]):
      dat[col] = dat[col].shift()
  return dat

## ARIMA

In [None]:
def countNasTrailing(series: pd.Series) -> int:
  is_na_reversed = series[::-1].isna()

  # Create a reversed series where each True is replaced with 1
  # and each False (after the first) with 0.
  na_count_reversed = is_na_reversed.cumprod()

  # Count the number of trailing NaNs by summing the reversed count
  na_trailing_count = int(na_count_reversed.sum())
  return na_trailing_count

In [None]:
def ArimaExrapolationForMonthlySeries_old(series: pd.Series) -> pd.Series:
  series = copy.deepcopy(series)
  naTrailingCount = countNasTrailing(series)
  if naTrailingCount == 0:
    return series

  naDroppedSeries = series.dropna()
  autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=False, m=0)
  # autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=True, m=12) # Too slow
  # print(autoArimaModel.summary)
  predicted = autoArimaModel.predict(naTrailingCount)
  # res = ARIMA(naDroppedSeries, order=(1,1,1)).fit()
  # predicted = res.forecast(naTrailingCount)
  return series.fillna(predicted)

In [None]:
def ArimaExrapolationForMonthlySeries(series: pd.Series) -> pd.Series:
  series = copy.deepcopy(series)
  naTrailingCount = countNasTrailing(series)
  if naTrailingCount == 0:
    return series

  naDroppedSeries = series.dropna()
  # autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=False, m=0)
  # autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=True, m=12) # Too slow
  # print(autoArimaModel.summary)
  # predicted = autoArimaModel.predict(naTrailingCount)
  res = ARIMA(naDroppedSeries, order=(1,1,1)).fit()
  predicted = res.forecast(naTrailingCount)
  return series.fillna(predicted)

In [None]:
def ArimaExrapolationForQuarterlySeries_old(series: pd.Series) -> pd.Series:
  series = copy.deepcopy(series)
  seriesAsQuarterly = convertToQuarterlyMean(series)
  naTrailingCount = countNasTrailing(seriesAsQuarterly)
  if naTrailingCount == 0:
    return series

  naDroppedSeries = seriesAsQuarterly.dropna()
  autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=False, m=0)
  # autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=True, m=4) # Too slow
  # print(autoArimaModel.summary)
  predicted = autoArimaModel.predict(naTrailingCount)
  # res = ARIMA(naDroppedSeries, order=(1,1,1)).fit()
  # predicted = res.forecast(naTrailingCount)
  return series.fillna(predicted.asfreq('M'))

In [None]:
def ArimaExrapolationForQuarterlySeries(series: pd.Series) -> pd.Series:
  series = copy.deepcopy(series)
  seriesAsQuarterly = convertToQuarterlyMean(series)
  naTrailingCount = countNasTrailing(seriesAsQuarterly)
  if naTrailingCount == 0:
    return series

  naDroppedSeries = seriesAsQuarterly.dropna()
  # autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=False, m=0)
  # autoArimaModel = pm.auto_arima(naDroppedSeries, seasonal=True, m=4) # Too slow
  # print(autoArimaModel.summary)
  # predicted = autoArimaModel.predict(naTrailingCount)
  res = ARIMA(naDroppedSeries, order=(1,1,1)).fit()
  predicted = res.forecast(naTrailingCount)
  return series.fillna(predicted.asfreq('M'))

In [None]:
def ArimaExrapolation(dat: pd.DataFrame, target: str='GDP(SA)') -> pd.DataFrame:
  dat = copy.deepcopy(dat)
  for col in dat.columns:
    if col == target:
      continue
    elif 'GDP' in col:
      dat[col] = ArimaExrapolationForQuarterlySeries(dat[col])
    else:
      dat[col] = ArimaExrapolationForMonthlySeries(dat[col])
  return dat

## K-Nearest Neighbor

In [None]:
def knnExtraplation(dat: pd.DataFrame, target: str='GDP(SA)') -> pd.DataFrame:
  dat = copy.deepcopy(dat)

  # Training
  regressorDict = dict()
  parameters = {
      'kneighborsregressor__n_neighbors': [5], #range(2,10),
      'kneighborsregressor__weights':['distance']
      }

  # Monthly
  monthlyVariables = list(filter(lambda col: 'GDP' not in col, dat.columns))
  tempDataMonthly = copy.deepcopy(dat).drop(columns=target)[monthlyVariables]
  tempDataMonthly = tempDataMonthly/tempDataMonthly.shift(1) - 1
  tempDataMonthlyNonNa = tempDataMonthly.dropna()
  for col in monthlyVariables:
    regressorDict[col] = GridSearchCV(
        make_pipeline(StandardScaler(), KNeighborsRegressor()),
        parameters
        # ).fit(tempDataMonthlyNonNa.values[:-2], tempDataMonthlyNonNa[col].values[2:])
        ).fit(np.concatenate([tempDataMonthlyNonNa.values[:-2], tempDataMonthlyNonNa.values[1:-1]], axis=1), tempDataMonthlyNonNa[col].values[2:])

  parameters = {
      'kneighborsregressor__n_neighbors': [3], #range(2,10),
      'kneighborsregressor__weights':['distance']
      }
  # Quarterly
  quarterlyVariables = list(filter(lambda col: 'GDP' in col, dat.columns))
  quarterlyVariables.remove(target)
  tempDataQuarterly = copy.deepcopy(dat).drop(columns=target)
  tempDataQuarterly = convertToQuarterlyMean(tempDataQuarterly)
  tempDataQuarterly = tempDataQuarterly/tempDataQuarterly.shift(1) - 1
  tempDataQuarterlyNonNa = tempDataQuarterly.dropna()
  for col in quarterlyVariables:
    regressorDict[col] = GridSearchCV(
        make_pipeline(StandardScaler(), KNeighborsRegressor()),
        parameters
        ).fit(tempDataQuarterlyNonNa.values[:-1], tempDataQuarterlyNonNa[col].values[1:])

  # Predict

  # Monthly
  tempDataMonthly = dropStartingNa(tempDataMonthly)
  assert len(tempDataMonthly) > 1
  for date, val in zip(tempDataMonthly.index, tempDataMonthly.isna().apply(sum, axis=1)):
    if val == 0:
      continue
    for col in monthlyVariables:
      if np.isnan(tempDataMonthly[col].loc[date]):
        # tempDataMonthly[col].loc[date] = regressorDict[col].predict(tempDataMonthly.loc[:date].values[-2].reshape(1, -1))
        tempDataMonthly[col].loc[date] = regressorDict[col].predict(np.concatenate([tempDataMonthly.loc[:date].values[-3], tempDataMonthly.loc[:date].values[-2]]).reshape(1, -1))
        dat[col].loc[date] = dat[col].loc[date -1] * (1 + tempDataMonthly[col].loc[date])

  # Quarterly
  tempDataQuarterly = dropStartingNa(convertToQuarterlyMean(copy.deepcopy(dat).drop(columns=target)))
  assert len(tempDataQuarterly) > 1
  for date, val in zip(tempDataQuarterly.index, tempDataQuarterly.isna().apply(sum, axis=1)):
    if val == 0:
      continue
    for col in quarterlyVariables:
      if np.isnan(tempDataQuarterly[col].loc[date]):
        tempDataQuarterly[col].loc[date] = regressorDict[col].predict(tempDataQuarterly.loc[:date].values[-2].reshape(1, -1))
        dat[col].loc[date.asfreq('M')] = dat[col].loc[(date - 1).asfreq('M')] * (1 + tempDataQuarterly[col].loc[date])

  return dat

In [None]:
def knnExtraplation_old2(dat: pd.DataFrame, target: str='GDP(SA)') -> pd.DataFrame:
  dat = copy.deepcopy(dat)

  # Training
  regressorDict = dict()
  parameters = {
      'kneighborsregressor__n_neighbors': [5], #range(2,10),
      'kneighborsregressor__weights':['distance']
      }

  # Monthly
  monthlyVariables = list(filter(lambda col: 'GDP' not in col, dat.columns))
  tempDataMonthly = copy.deepcopy(dat).drop(columns=target)[monthlyVariables]
  tempDataMonthly = tempDataMonthly/tempDataMonthly.shift(1)
  tempDataMonthlyNonNa = tempDataMonthly.dropna()
  for col in monthlyVariables:
    regressorDict[col] = GridSearchCV(
        make_pipeline(StandardScaler(), KNeighborsRegressor()),
        parameters
        ).fit(tempDataMonthlyNonNa.values[:-1], tempDataMonthlyNonNa[col].values[1:])

  parameters = {
      'kneighborsregressor__n_neighbors': [3], #range(2,10),
      'kneighborsregressor__weights':['distance']
      }
  # Quarterly
  quarterlyVariables = list(filter(lambda col: 'GDP' in col, dat.columns))
  quarterlyVariables.remove(target)
  tempDataQuarterly = copy.deepcopy(dat).drop(columns=target)
  tempDataQuarterly = convertToQuarterlyMean(tempDataQuarterly)
  tempDataQuarterly = tempDataQuarterly/tempDataQuarterly.shift(1)
  tempDataQuarterlyNonNa = tempDataQuarterly.dropna()
  for col in quarterlyVariables:
    regressorDict[col] = GridSearchCV(
        make_pipeline(StandardScaler(), KNeighborsRegressor()),
        parameters
        ).fit(tempDataQuarterlyNonNa.values[:-1], tempDataQuarterlyNonNa[col].values[1:])

  # Predict

  # Monthly
  tempDataMonthly = dropStartingNa(tempDataMonthly)
  assert len(tempDataMonthly) > 0
  for date, val in zip(tempDataMonthly.index, tempDataMonthly.isna().apply(sum, axis=1)):
    if val == 0:
      continue
    for col in monthlyVariables:
      if np.isnan(tempDataMonthly[col].loc[date]):
        tempDataMonthly[col].loc[date] = regressorDict[col].predict(tempDataMonthly.loc[:date].values[-2].reshape(1, -1))
        dat[col].loc[date] = dat[col].loc[date -1] * tempDataMonthly[col].loc[date]

  # Quarterly
  tempDataQuarterly = dropStartingNa(convertToQuarterlyMean(copy.deepcopy(dat).drop(columns=target)))
  assert len(tempDataQuarterly) > 0
  for date, val in zip(tempDataQuarterly.index, tempDataQuarterly.isna().apply(sum, axis=1)):
    if val == 0:
      continue
    for col in quarterlyVariables:
      if np.isnan(tempDataQuarterly[col].loc[date]):
        tempDataQuarterly[col].loc[date] = regressorDict[col].predict(tempDataQuarterly.loc[:date].values[-2].reshape(1, -1))
        dat[col].loc[date.asfreq('M')] = dat[col].loc[(date - 1).asfreq('M')] * tempDataQuarterly[col].loc[date]

  return dat

In [None]:
def knnExtraplation_old(dat: pd.DataFrame, target: str='GDP(SA)') -> pd.DataFrame:
  dat = copy.deepcopy(dat)

  # Training
  regressorDict = dict()
  parameters = {
      'kneighborsregressor__n_neighbors': [5], #range(2,10),
      'kneighborsregressor__weights':['distance']
      }

  # Monthly
  monthlyVariables = list(filter(lambda col: 'GDP' not in col, dat.columns))
  tempDataMonthly = copy.deepcopy(dat).drop(columns=target)[monthlyVariables]
  tempDataMonthlyNonNa = tempDataMonthly.dropna()
  for col in monthlyVariables:
    regressorDict[col] = GridSearchCV(
        make_pipeline(StandardScaler(), KNeighborsRegressor()),
        parameters
        ).fit(tempDataMonthlyNonNa.values[:-1], tempDataMonthlyNonNa[col].values[1:])

  parameters = {
      'kneighborsregressor__n_neighbors': [3], #range(2,10),
      'kneighborsregressor__weights':['distance']
      }
  # Quarterly
  quarterlyVariables = list(filter(lambda col: 'GDP' in col, dat.columns))
  quarterlyVariables.remove(target)
  tempDataQuarterly = copy.deepcopy(dat).drop(columns=target)
  tempDataQuarterly = convertToQuarterlyMean(tempDataQuarterly)
  tempDataQuarterlyNonNa = tempDataQuarterly.dropna()
  for col in quarterlyVariables:
    regressorDict[col] = GridSearchCV(
        make_pipeline(StandardScaler(), KNeighborsRegressor()),
        parameters
        ).fit(tempDataQuarterlyNonNa.values[:-1], tempDataQuarterlyNonNa[col].values[1:])

  # Predict

  # Monthly
  tempDataMonthly = dropStartingNa(tempDataMonthly)
  assert len(tempDataMonthly) > 0
  for date, val in zip(tempDataMonthly.index, tempDataMonthly.isna().apply(sum, axis=1)):
    if val == 0:
      continue
    for col in monthlyVariables:
      if np.isnan(tempDataMonthly[col].loc[date]):
        tempDataMonthly[col].loc[date] = regressorDict[col].predict(tempDataMonthly.loc[:date].values[-2].reshape(1, -1))
        dat[col].loc[date] = tempDataMonthly[col].loc[date]

  # Quarterly
  tempDataQuarterly = dropStartingNa(convertToQuarterlyMean(copy.deepcopy(dat).drop(columns=target)))
  assert len(tempDataQuarterly) > 0
  for date, val in zip(tempDataQuarterly.index, tempDataQuarterly.isna().apply(sum, axis=1)):
    if val == 0:
      continue
    for col in quarterlyVariables:
      if np.isnan(tempDataQuarterly[col].loc[date]):
        tempDataQuarterly[col].loc[date] = regressorDict[col].predict(tempDataQuarterly.loc[:date].values[-2].reshape(1, -1))
        dat[col].loc[date.asfreq('M')] = tempDataQuarterly[col].loc[date]

  return dat

# Select Variables

## Dynamic Selection

### Correlation Based

In [None]:
metaData.Category.unique()

array(['target', '통화금융', '자산가격', '물가', '대외거래', '환율', '기업심리', '경제심리',
       '경기종합', '광공업', '서비스업', '건설업', '고용', '소비자심리', '재정', '에너지', '세계경기',
       '국제원자재'], dtype=object)

In [None]:
def selectVarialbes(dat: pd.DataFrame, metaData: pd.DataFrame, target: str='GDP(SA)') -> List[str]:
  variables = [target]
  for category in metaData.Category.unique():
    if category == 'target':
      continue
    tempData = dat[[target]].join([dat[metaData[metaData.Category == category].index]])
    variables.append(tempData.corr()[target].drop(target).idxmax())
  return variables

## Pre-selected

### Sentiment Variables

In [None]:
sentimentBasedVarialbes = ['전산업매출실적BSI', '전산업업황실적BSI', '전산업채산성실적BSI', '전산업자금사정실적BSI',
       '전산업인력사정실적BSI', '제조업업황실적BSI(SA)', '비제조업업황실적BSI(SA)', '제조업업황전망BSI(SA)',
       '비제조업업황전망BSI(SA)', 'NSI', '경제심리지수(순환변동치)', '현재경기판단CSI', '소비자심리지수']

### Core Variables

In [None]:
coreVarialbes = ['KOSPI(평균)', '수출액', '원달러환율(평균)', '설비투자지수(SA)', '전산업생산지수(SA)', '소매판매액지수(SA)', '선행지수순환변동치', '실업률(SA)', 'WTI유가현물']

# Split TrainX, TrainY, TestX

In [None]:
def splitAndRollingData(dat: pd.DataFrame, seqLength: int, target: str='GDP(SA)', ahead: int=0) -> Tuple[np.ndarray]:
  dat = copy.deepcopy(dat)
  # with target lag term
  # trainY = dat[target].iloc[seqLength - 1 + 1 + 0:-1].values

  # without target lag term
  trainY = dat[target].iloc[seqLength - 1 + 0:-1 - ahead].values

  # with target lag term
  # dat[target] = dat[target].shift(1)
  # dat = dat.dropna()

  # without target lag term
  dat = dat.drop(columns=target)

  trainX = []
  for start in range(len(dat) - seqLength - 0 - ahead):
    end = start + seqLength
    tempData = copy.deepcopy(dat.iloc[start:end])
    trainX.append(tempData.values)

  testX = np.expand_dims(copy.deepcopy(dat.iloc[-seqLength:]).values, axis=0)
  return (np.array(trainX), trainY, testX)


In [None]:
def splitAndRollingData(dat: pd.DataFrame, seqLength: int, target: str='GDP(SA)', ahead: int=0) -> Tuple[np.ndarray]:
  # Use all observed gdp

  dat = copy.deepcopy(dat)
  # with target lag term
  # trainY = dat[target].iloc[seqLength - 1 + 1 + 0:-1].values

  # without target lag term
  #trainY = dat[target].iloc[seqLength - 1 + 0:-1 - ahead].values
  trainY = dat[target].dropna().iloc[seqLength - 1 + 0:].values

  # with target lag term
  # dat[target] = dat[target].shift(1)
  # dat = dat.dropna()

  # without target lag term
  dat = dat.drop(columns=target)

  trainX = []
  #for start in range(len(dat) - seqLength - 0 - ahead):
  for start in range(len(trainY)):
    end = start + seqLength
    tempData = copy.deepcopy(dat.iloc[start:end])
    trainX.append(tempData.values)

  testX = np.expand_dims(copy.deepcopy(dat.iloc[-seqLength:]).values, axis=0)
  return (np.array(trainX), trainY, testX)


# Models

## LQR

In [None]:
q=0.5
vcov='robust'
kernel = 'epa'
bandwidth = 'hsheather'
p_tol = 1e-01 # Impact!!
def LQRPredict(trainX, trainY, testX):
    return QuantReg(trainY,trainX.reshape(len(trainX), -1)).fit(q=0.5, vcov=vcov, kernel=kernel, bandwidth=bandwidth, p_tol=p_tol).predict(testX.reshape(len(testX), -1))

## OptimARIMA

In [None]:
def optimize_mARIMA(parameters_list, endg, exog):
    """
        Return dataframe with parameters, corresponding AIC and SSE

        parameters_list - list with (p, q) tuples
        d - integration order
        exog - the exogenous variable
    """

    results = []
    for param in tqdm_notebook(parameters_list):
        model = ARIMA(endg, exog, order=(param[0], param[1], param[2])).fit()

        aic = model.aic
        results.append([param, aic])
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)', 'AIC']
    #Sort in ascending order, lower AIC is better
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
#     print(result_df)
    return result_df

p = range(0, 4, 1)
d = range(0, 2, 1)
q = range(0, 2, 1)
parameters = product(p, d, q)
parameters_list = list(parameters)

import warnings
warnings.filterwarnings("ignore")

def OAMPredict(trainX, trainY, testX):
    trainX = trainX.reshape(len(trainX), -1)
    testX = testX.reshape(len(testX), -1)
    result_df = optimize_mARIMA(parameters_list, trainY, trainX)
    print(result_df.iloc[0][0])
    return ARIMA(trainY, exog=trainX, order=result_df.iloc[0][0]).fit().forecast(1,exog=testX)

## SVM

In [None]:
def SVRPredict(trainX, trainY, testX):
  return SVR(kernel='rbf', gamma=1e-8, C=4e5, epsilon=1e-12, tol=1e-4).fit(trainX.reshape(len(trainX), -1), trainY).predict(testX.reshape(len(testX), -1))

## Random Forest

In [None]:
def randomForestPredict(trainX, trainY, testX):
  return RFR(n_estimators=100, random_state=0).fit(trainX.reshape(len(trainX), -1), trainY).predict(testX.reshape(len(testX), -1))

## XGBoost

In [None]:
subsample = 0.5
reg_alpha = 1.5
reg_lambda = 1.0
gamma = 2.0
learning_rate = 0.3
def XGBoostPredict(trainX, trainY, testX):
  return XGBR(n_estimators=50,learning_rate=learning_rate,max_depth=3,subsample=subsample,randome_state=0,reg_alpha=reg_alpha, reg_lambda=reg_lambda, gamma=gamma).fit(trainX.reshape(len(trainX), -1), trainY).predict(testX.reshape(len(testX), -1))

# Forcast

In [None]:
forcasts = pd.DataFrame(index=['0Q Ahead', '1Q Ahead'], columns=['lqr_nowcasting', 'oam_nowcasting', 'svr_nowcasting', 'rf_nowcasting', 'xgb_nowcasting'])

## SA

In [None]:
data = x13SA(data)

In [None]:
data.to_excel(f'data(SA){datetime.today().strftime("%Y%m%d")}.xlsx')

In [None]:
data = pd.read_excel(f'data(SA){datetime.today().strftime("%Y%m%d")}.xlsx', index_col=0)
# data = pd.read_excel(f'data(SA){20240315}.xlsx', index_col=0)

In [None]:
data.index = pd.PeriodIndex(data.index, freq='M')

## Manipulate

In [None]:
# data = data.reindex(pd.period_range(start='1990-01', end='2024-6', freq='M'))

In [None]:
# data = extend(data, '202312', '수출액', 2.1, 'yoy')
# data = extend(data, '202312', '수입액', -11.1, 'yoy')

In [None]:
# data.loc['202307', '소매판매액지수(SA)'] = np.nan
# data.loc['202307', '설비투자지수(SA)'] = np.nan
# data.loc['202307', '제조업생산지수(SA)'] = np.nan
# data.loc['202307', '전산업생산지수서비스업(SA)'] = np.nan
# data.loc['202308', 'WTI유가현물'] = np.nan

In [None]:
# data.loc['202307', '소비자물가지수'] = np.nan
# data.loc['202307', '소비자물가지수(농산물석유류제외)'] = np.nan
# data.loc['202307', '소비자물가지수(식료품에너지제외)'] = np.nan

## 1Q Ahead

In [None]:
data = data.reindex(pd.period_range(start='1990-01', end='2024-9', freq='M'))

### Core + ARIMA + (LQR + OptimARIMA)

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
tempData = tempData[['GDP(SA)'] +coreVarialbes]

# extraplation
tempData = ArimaExrapolation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength, ahead=1)
print(trainX.shape, trainY.shape, testX.shape)

# predict
test_y = LQRPredict(trainX, trainY, testX)
forcasts.loc['1Q Ahead', 'lqr_nowcasting'] = test_y[0]

test_y = OAMPredict(trainX, trainY, testX)
forcasts.loc['1Q Ahead', 'oam_nowcasting'] = test_y[0]

(96, 1, 9) (96,) (1, 1, 9)


  0%|          | 0/16 [00:00<?, ?it/s]

(0, 0, 0)


### Correlation Based Variable Selection + KNN + RF

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
selectedVariables = selectVarialbes(preprocess(tempData), metaData)
tempData = tempData[selectedVariables]

# extraplation
tempData = knnExtraplation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength, ahead=1)
print(trainX.shape, trainY.shape, testX.shape)

# predict
#test_y = SVRPredict(trainX, trainY, testX)
test_y = randomForestPredict(trainX, trainY, testX)
#test_y = XGBoostPredict(trainX, trainY, testX)

(58, 1, 17) (58,) (1, 1, 17)


In [None]:
forcasts.loc['1Q Ahead', 'rf_nowcasting'] = test_y[0]

In [None]:
selectedVariables

['GDP(SA)',
 '협의통화(평잔,SA)',
 'KOSPI(평균)',
 '수입물가지수',
 '수입물량지수',
 '원파운드환율(평균)',
 '제조업내수판매실적BSI',
 '경제심리지수(순환변동치)',
 '경기동행종합지수',
 '제조업제품출하지수(SA)',
 '소매판매액지수(SA)',
 '건설기성액(SA)',
 '임시근로자수(SA)',
 '소비자심리지수',
 '통합재정수지',
 '제조업전력사용량',
 '세계무역량지수(SA)',
 'Dubai유가현물']

### Sentiment Based Varialbe Selection + ARIMA + XGB

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
tempData = tempData[['GDP(SA)'] +sentimentBasedVarialbes]

# extraplation
tempData = ArimaExrapolation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength, ahead=1)
print(trainX.shape, trainY.shape, testX.shape)

# predict
#test_y = SVRPredict(trainX, trainY, testX)
#forcasts.loc[testDate, 'svr_nowcasting'] = test_y[0]

#test_y = randomForestPredict(trainX, trainY, testX)
#forcasts.loc[testDate, 'rf_nowcasting'] = test_y[0]

test_y = XGBoostPredict(trainX, trainY, testX)
#forcasts.loc[testDate, 'xgb_nowcasting'] = test_y[0]

(62, 1, 13) (62,) (1, 1, 13)


In [None]:
forcasts.loc['1Q Ahead', 'xgb_nowcasting'] = test_y[0]

### Core Varialbe Selection + KNN + SVR

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
tempData = tempData[['GDP(SA)'] +coreVarialbes]

# extraplation
tempData = knnExtraplation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength, ahead=1)
print(trainX.shape, trainY.shape, testX.shape)

# predict
test_y = SVRPredict(trainX, trainY, testX)
#test_y = randomForestPredict(trainX, trainY, testX)
#test_y = XGBoostPredict(trainX, trainY, testX)

(96, 1, 9) (96,) (1, 1, 9)


In [None]:
forcasts.loc['1Q Ahead', 'svr_nowcasting'] = test_y[0]

## 0Q Ahead

In [None]:
data = data.reindex(pd.period_range(start='1990-01', end='2024-6', freq='M'))

### Core + ARIMA + (LQR + OptimARIMA)

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
tempData = tempData[['GDP(SA)'] +coreVarialbes]

# extraplation
tempData = ArimaExrapolation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength)
print(trainX.shape, trainY.shape, testX.shape)

# predict
test_y = LQRPredict(trainX, trainY, testX)
forcasts.loc['0Q Ahead', 'lqr_nowcasting'] = test_y[0]

test_y = OAMPredict(trainX, trainY, testX)
forcasts.loc['0Q Ahead', 'oam_nowcasting'] = test_y[0]

(96, 1, 9) (96,) (1, 1, 9)


  0%|          | 0/16 [00:00<?, ?it/s]

(0, 0, 0)


In [None]:
forcasts

Unnamed: 0,lqr_nowcasting,oam_nowcasting,svr_nowcasting,rf_nowcasting,xgb_nowcasting
0Q Ahead,-0.395558,0.309678,,,
1Q Ahead,-0.056849,0.422019,0.819967,0.658617,0.785686


### Correlation Based Variable Selection + KNN + RF

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
selectedVariables = selectVarialbes(preprocess(tempData), metaData)
tempData = tempData[selectedVariables]

# extraplation
tempData = knnExtraplation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength)
print(trainX.shape, trainY.shape, testX.shape)

# predict
#test_y = SVRPredict(trainX, trainY, testX)
test_y = randomForestPredict(trainX, trainY, testX)
#test_y = XGBoostPredict(trainX, trainY, testX)

(58, 1, 17) (58,) (1, 1, 17)


In [None]:
forcasts.loc['0Q Ahead', 'rf_nowcasting'] = test_y[0]

In [None]:
selectedVariables

['GDP(SA)',
 '협의통화(평잔,SA)',
 'KOSPI(평균)',
 '수입물가지수',
 '수입물량지수',
 '원파운드환율(평균)',
 '제조업내수판매실적BSI',
 '경제심리지수(순환변동치)',
 '경기동행종합지수',
 '제조업제품출하지수(SA)',
 '소매판매액지수(SA)',
 '건설기성액(SA)',
 '임시근로자수(SA)',
 '소비자심리지수',
 '통합재정수지',
 '제조업전력사용량',
 '세계무역량지수(SA)',
 'Dubai유가현물']

### Sentiment Based Varialbe Selection + ARIMA + XGB

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
tempData = tempData[['GDP(SA)'] +sentimentBasedVarialbes]

# extraplation
tempData = ArimaExrapolation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength)
print(trainX.shape, trainY.shape, testX.shape)

# predict
#test_y = SVRPredict(trainX, trainY, testX)
#forcasts.loc[testDate, 'svr_nowcasting'] = test_y[0]

#test_y = randomForestPredict(trainX, trainY, testX)
#forcasts.loc[testDate, 'rf_nowcasting'] = test_y[0]

test_y = XGBoostPredict(trainX, trainY, testX)
#forcasts.loc[testDate, 'xgb_nowcasting'] = test_y[0]

(62, 1, 13) (62,) (1, 1, 13)


In [None]:
forcasts.loc['0Q Ahead', 'xgb_nowcasting'] = test_y[0]

### Core Varialbe Selection + KNN + SVR

In [None]:
seqLength = 1

In [None]:
tempData = copy.deepcopy(data)

# select variables
tempData = tempData[['GDP(SA)'] +coreVarialbes]

# extraplation
tempData = knnExtraplation(tempData)

# preprocess
tempData = preprocess(tempData)

# drop na
tempData = dropStartingNa(tempData)

# split dataset
trainX, trainY, testX = splitAndRollingData(tempData, seqLength)
print(trainX.shape, trainY.shape, testX.shape)

# predict
test_y = SVRPredict(trainX, trainY, testX)
#test_y = randomForestPredict(trainX, trainY, testX)
#test_y = XGBoostPredict(trainX, trainY, testX)

(96, 1, 9) (96,) (1, 1, 9)


In [None]:
forcasts.loc['0Q Ahead', 'svr_nowcasting'] = test_y[0]

## Result

In [None]:
forcasts

Unnamed: 0,lqr_nowcasting,oam_nowcasting,svr_nowcasting,rf_nowcasting,xgb_nowcasting
0Q Ahead,-0.395558,0.309678,0.94721,0.634819,0.785686
1Q Ahead,-0.056849,0.422019,0.819967,0.658617,0.785686


In [None]:
forcasts.to_excel(f'result{datetime.today().strftime("%Y%m%d")}.xlsx')