# `indexPrice`

- `KRX_KOSPI200_indexPrice`
  - 개요
    - 2011-01-03~2022-09-30
    - KRX KOSPI200 지수 가격 데이터
  - Data Source
    - [KRX_정보데이터시스템](http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201010105)
- `KRX_KOSPI200ESG_indexPrice`
  - 개요
    - 2012-01-03~2022-09-30
      - 2011/01/03 부터 조회하였으나 2011년 데이터 없음.
    - KRX KOSPI200ESG 지수 가격 데이터
  - Data Source
    - [KRX_정보데이터시스템](http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201010105&idxCd=1&idxCd2=180)

# import

In [1]:
import os
import sys
import time
import pickle
import warnings
import urllib.request
from glob import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import koreanize_matplotlib
import requests
from bs4 import BeautifulSoup as bs

import FinanceDataReader as fdr
from tqdm import tqdm


warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
# pd.options.display.float_format = '{:.4f}'.format
plt.style.use("ggplot")
%config InlineBackend.figure_format = 'retina'


sys.path.append("../import")
import module as m

data_path = m.data_path

# kip : Kospi 200 stock Index Price
fp_kip = f"{data_path}KRX_KOSPI200_indexPrice_raw.csv"
# keip : Kospi 200 Esg stock Index Price
fp_keip = f"{data_path}KRX_KOSPI200ESG_indexPrice_raw.csv"

# ip0 : Index Price merge axis=0
fp_ip0 = f"{data_path}indexPrice0.parquet"
# ip1 : Index Price merge axis=1
fp_ip1 = f"{data_path}indexPrice1.parquet"

data_path : ../data/
fp
{'esgRating': '../data/esgRating.parquet',
 'finaStat': '../data/finaStat.parquet',
 'stockPrice': '../data/stockPrice.parquet',
 'stockPrice_year': '../data/stockPrice_year.parquet'}


# `indexPrice`

## DataLoad

In [2]:
df_kip = m.DataLoad(fp_kip)
m.Check_df(df_kip)

Mem. usage decreased to  0.09 Mb (60.0% reduction)


[1m┌▣ [4mdf.shape[0m ---- ---- ---- ----
(2895, 10)


[1m┌▣ [4mdf.info()[0m ---- ---- ---- ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2895 entries, 0 to 2894
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   일자      2895 non-null   object 
 1   종가      2895 non-null   float16
 2   대비      2895 non-null   float16
 3   등락률     2895 non-null   float16
 4   시가      2895 non-null   float16
 5   고가      2895 non-null   float16
 6   저가      2895 non-null   float16
 7   거래량     2895 non-null   float32
 8   거래대금    2895 non-null   float32
 9   상장시가총액  2895 non-null   float32
dtypes: float16(6), float32(3), object(1)
memory usage: 90.6+ KB
None


[1m┌▣ [4mdf.head()[0m ---- ---- ---- ----


Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2022/09/30,281.25,-1.290039,-0.459961,281.75,284.25,278.5,143738.0,6233068.0,1485352000.0
1,2022/09/29,282.75,-0.540039,-0.189941,286.5,287.75,282.5,128427.0,5584454.0,1496674000.0
2,2022/09/28,283.25,-7.011719,-2.419922,287.75,289.75,280.75,160839.0,6903850.0,1495226000.0
3,2022/09/27,290.25,0.429932,0.150024,290.25,290.75,286.75,152310.0,6310896.0,1532257000.0
4,2022/09/26,289.75,-7.96875,-2.679688,293.75,294.0,289.0,177692.0,7088012.0,1530092000.0




[1m┌▣ [4mdf.columns.to_list()[0m ---- ---- ---- ----
['일자', '종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '상장시가총액']


[1m┌▣ [4mdf.shape[0m ---- ---- ---- ----
(2895, 10)


[1m┌▣ [4mdf.info()[0m ---- ---- ---- ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2895 entries, 0 to 2894
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   일자      2895 non-null   object 
 1   종가      2895 non-null   float16
 2   대비      2895 non-null   float16
 3   등락률     2895 non-null   float16
 4   시가      2895 non-null   float16
 5   고가      2895 non-null   float16
 6   저가      2895 non-null   float16
 7   거래량     2895 non-null   float32
 8   거래대금    2895 non-null   float32
 9   상장시가총액  2895 non-null   float32
dtypes: float16(6), float32(3), object(1)
memory usage: 90.6+ KB
None


[1m┌▣ [4mdf.head()[0m ---- ---- ---- ----


Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2022/09/30,281.25,-1.290039,-0.459961,281.75,284.25,278.5,143738.0,6233068.0,1485352000.0
1,2022/09/29,282.75,-0.540039,-0.189941,286.5,287.75,282.5,128427.0,5584454.0,1496674000.0
2,2022/09/28,283.25,-7.011719,-2.419922,287.75,289.75,280.75,160839.0,6903850.0,1495226000.0
3,2022/09/27,290.25,0.429932,0.150024,290.25,290.75,286.75,152310.0,6310896.0,1532257000.0
4,2022/09/26,289.75,-7.96875,-2.679688,293.75,294.0,289.0,177692.0,7088012.0,1530092000.0




[1m┌▣ [4mdf.columns.to_list()[0m ---- ---- ---- ----
['일자', '종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '상장시가총액']


In [2]:
df_kip = pd.read_csv(fp_kip, index_col=False, encoding="cp949")
m.Check_df(df_kip)



[1m┌▣ [4mdf.shape[0m ---- ---- ---- ----
(2895, 10)


[1m┌▣ [4mdf.info()[0m ---- ---- ---- ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2895 entries, 0 to 2894
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   일자      2895 non-null   object 
 1   종가      2895 non-null   float64
 2   대비      2895 non-null   float64
 3   등락률     2895 non-null   float64
 4   시가      2895 non-null   float64
 5   고가      2895 non-null   float64
 6   저가      2895 non-null   float64
 7   거래량     2895 non-null   float64
 8   거래대금    2895 non-null   float64
 9   상장시가총액  2895 non-null   float64
dtypes: float64(9), object(1)
memory usage: 226.3+ KB
None


[1m┌▣ [4mdf.head()[0m ---- ---- ---- ----


Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2022/09/30,281.36,-1.29,-0.46,281.75,284.35,278.39,143738.0,6233068.0,1485352000.0
1,2022/09/29,282.65,-0.54,-0.19,286.6,287.81,282.56,128427.0,5584454.0,1496674000.0
2,2022/09/28,283.19,-7.01,-2.42,287.66,289.75,280.8,160839.0,6903850.0,1495226000.0
3,2022/09/27,290.2,0.43,0.15,290.27,290.75,286.76,152310.0,6310896.0,1532256000.0
4,2022/09/26,289.77,-7.97,-2.68,293.65,293.96,289.01,177692.0,7088012.0,1530092000.0




[1m┌▣ [4mdf.columns.to_list()[0m ---- ---- ---- ----
['일자', '종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '상장시가총액']


In [3]:
df_keip = pd.read_csv(fp_keip, index_col=False, encoding="cp949")
m.Check_df(df_keip)



[1m┌▣ [4mdf.shape[0m ---- ---- ---- ----
(2647, 10)


[1m┌▣ [4mdf.info()[0m ---- ---- ---- ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2647 entries, 0 to 2646
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   일자      2647 non-null   object 
 1   종가      2647 non-null   float64
 2   대비      2646 non-null   float64
 3   등락률     2646 non-null   float64
 4   시가      930 non-null    float64
 5   고가      930 non-null    float64
 6   저가      930 non-null    float64
 7   거래량     930 non-null    float64
 8   거래대금    930 non-null    float64
 9   상장시가총액  930 non-null    float64
dtypes: float64(9), object(1)
memory usage: 206.9+ KB
None


[1m┌▣ [4mdf.head()[0m ---- ---- ---- ----


Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2022/09/30,307.1,-1.6,-0.52,307.79,310.54,304.05,99037.0,4650248.0,1094917000.0
1,2022/09/29,308.7,-0.87,-0.28,313.24,314.51,308.6,85232.0,3926563.0,1100187000.0
2,2022/09/28,309.57,-7.93,-2.5,314.57,316.48,307.0,110849.0,5046265.0,1100697000.0
3,2022/09/27,317.5,0.28,0.09,317.76,318.11,313.82,109559.0,4700819.0,1128366000.0
4,2022/09/26,317.22,-8.63,-2.65,321.24,321.59,316.5,126013.0,5101049.0,1126643000.0




[1m┌▣ [4mdf.columns.to_list()[0m ---- ---- ---- ----
['일자', '종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '상장시가총액']


## 전처리 : datetime

In [4]:
# 컬럼명 변경하기
df_kip.rename(columns={"일자": "연_월_일", "대비":"종가_대비"}, inplace=True)
# 컬럼 순서
list_colOrder = ['연_월_일', '시가', '고가', '저가', '종가', "종가_대비","등락률", '거래량','거래대금', '상장시가총액']
df_kip = df_kip[list_colOrder]
df_kip.head(2)

Unnamed: 0,연_월_일,시가,고가,저가,종가,종가_대비,등락률,거래량,거래대금,상장시가총액
0,2022/09/30,281.75,284.35,278.39,281.36,-1.29,-0.46,143738.0,6233068.0,1485352000.0
1,2022/09/29,286.6,287.81,282.56,282.65,-0.54,-0.19,128427.0,5584454.0,1496674000.0


In [5]:
m.DerivedCol_Date(df_kip, col_YMD="연_월_일", inplace=True)

True


Unnamed: 0,연_월_일,시가,고가,저가,종가,종가_대비,등락률,거래량,거래대금,상장시가총액,연,분기,월,일,연_분기,연_월,분기_월,월_일,연_분기_월
0,2022-09-30,281.75,284.35,278.39,281.36,-1.29,-0.46,143738.0,6233068.0,1.485352e+09,2022,3,9,30,2022-3,2022-9,3-9,9-30,2022-3-9
1,2022-09-29,286.60,287.81,282.56,282.65,-0.54,-0.19,128427.0,5584454.0,1.496674e+09,2022,3,9,29,2022-3,2022-9,3-9,9-29,2022-3-9
2,2022-09-28,287.66,289.75,280.80,283.19,-7.01,-2.42,160839.0,6903850.0,1.495226e+09,2022,3,9,28,2022-3,2022-9,3-9,9-28,2022-3-9
3,2022-09-27,290.27,290.75,286.76,290.20,0.43,0.15,152310.0,6310896.0,1.532256e+09,2022,3,9,27,2022-3,2022-9,3-9,9-27,2022-3-9
4,2022-09-26,293.65,293.96,289.01,289.77,-7.97,-2.68,177692.0,7088012.0,1.530092e+09,2022,3,9,26,2022-3,2022-9,3-9,9-26,2022-3-9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2890,2011-01-07,274.00,275.61,273.24,275.61,0.89,0.32,113105.0,6131209.0,1.013763e+09,2011,1,1,7,2011-1,2011-1,1-1,1-7,2011-1-1
2891,2011-01-06,276.91,277.13,273.08,274.72,-0.63,-0.23,143227.0,6960295.0,1.009445e+09,2011,1,1,6,2011-1,2011-1,1-1,1-6,2011-1-1
2892,2011-01-05,275.37,275.94,274.57,275.35,-0.43,-0.16,127214.0,6860693.0,1.011270e+09,2011,1,1,5,2011-1,2011-1,1-1,1-5,2011-1-1
2893,2011-01-04,274.29,275.78,273.69,275.78,1.97,0.72,113809.0,6121661.0,1.012703e+09,2011,1,1,4,2011-1,2011-1,1-1,1-4,2011-1-1


## 전처리 : MinMaxScaling

# df_ip0 : merge axis=0

In [None]:
df_kip_ = df_kip.copy()
df_keip_ = df_keip.copy()

df_kip_["주가지수명"] = "KRX_KOSPI200"
df_keip_["주가지수명"] = "KRX_KOSPI200ESG"

df_ip0 = pd.concat([df_kip_,  df_keip_])

df_ip0.sort_values(by =["일자", "주가지수명"])
df_ip0

### (선택) 영속화

In [None]:
# m.DfPrst(df_ip0, fp_ip0)

# df_ip1 : merge axis=1

In [None]:
df_ip1 = pd.merge(df_kip, df_keip, how="outer", on="일자", suffixes=("KOSPI200", "_KOSPI200ESG"))
df_ip1.sort_values(by =["일자"])
df_ip1

## 전처리

### 영속화

In [None]:
m.DfPrst(df_ip1, fp_ip1)