# `indexPrice`

- `KRX_KOSPI200_indexPrice`
  - 개요
    - 2011-01-03~2018-12-31
    - KRX KOSPI200 지수 가격 데이터
  - Data Source
    - [KRX_정보데이터시스템](http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201010105)
- `KRX_KOSPI200ESG_indexPrice`
  - 개요
    - 2012-01-03~2022-09-30
      - 2011/01/03 부터 조회하였으나 2011년 데이터 없음.
    - KRX KOSPI200ESG 지수 가격 데이터
  - Data Source
    - [KRX_정보데이터시스템](http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201010105&idxCd=1&idxCd2=180)

# import

In [1]:
import sys
import time
from glob import glob

import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup as bs

from tqdm import tqdm

sys.path.append("../import")
import module as m

pd.options.display.max_columns = None

data_path = m.data_path

# kip : Kospi 200 stock Index Price
fp_kip = f"{data_path}KRX_KOSPI200_indexPrice_raw.csv"
# keip : Kospi 200 Esg stock Index Price
fp_keip = f"{data_path}KRX_KOSPI200ESG_indexPrice_raw.csv"
# ip : Index Price merge axis=0
fp_ip0 = f"""{data_path}indexPrice0.parquet'"""
# ip : Index Price merge axis=1
fp_ip1 = f"""{m.fp["indexPrice"]}"""

data_path : ../data/
fp
{'esgRating': '../data/esgRating.parquet',
 'finaStat': '../data/finaStat.parquet',
 'indexPrice': '../data/indexPrice.parquet',
 'stockPrice': '../data/stockPrice.parquet'}


# `indexPrice`

## DataLoad

In [2]:
df_kip = m.DataLoad(fp_kip)

Mem. usage decreased to  0.07 Mb (60.0% reduction)


[1m┌▣ [4mdf.shape[0m ---- ---- ---- ----
(2220, 10)


[1m┌▣ [4mdf.info()[0m ---- ---- ---- ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2220 entries, 0 to 2219
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   일자      2220 non-null   object 
 1   종가      2220 non-null   float16
 2   대비      2220 non-null   float16
 3   등락률     2220 non-null   float16
 4   시가      2220 non-null   float16
 5   고가      2220 non-null   float16
 6   저가      2220 non-null   float16
 7   거래량     2220 non-null   float32
 8   거래대금    2220 non-null   float32
 9   상장시가총액  2220 non-null   float32
dtypes: float16(6), float32(3), object(1)
memory usage: 69.5+ KB
None


[1m┌▣ [4mdf.head()[0m ---- ---- ---- ----


Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2018/12/28,262.0,1.410156,0.540039,261.5,262.75,261.25,62701.0,2895636.0,1165450000.0
1,2018/12/27,260.5,-0.409912,-0.160034,261.5,261.75,259.75,84457.0,4081822.0,1158683000.0
2,2018/12/26,261.0,-3.470703,-1.30957,261.0,262.5,259.25,92493.0,4372646.0,1161841000.0
3,2018/12/24,264.5,-0.609863,-0.22998,263.5,265.0,263.0,61962.0,2930704.0,1176009000.0
4,2018/12/21,265.0,0.25,0.090027,263.75,265.0,263.25,92600.0,4430738.0,1179096000.0




[1m┌▣ [4mdf.columns.to_list()[0m ---- ---- ---- ----
['일자', '종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '상장시가총액']


In [3]:
df_keip = m.DataLoad(fp_keip)

Mem. usage decreased to  0.05 Mb (59.9% reduction)


[1m┌▣ [4mdf.shape[0m ---- ---- ---- ----
(1721, 10)


[1m┌▣ [4mdf.info()[0m ---- ---- ---- ----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1721 entries, 0 to 1720
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   일자      1721 non-null   object 
 1   종가      1721 non-null   float16
 2   대비      1720 non-null   float16
 3   등락률     1720 non-null   float16
 4   시가      4 non-null      float16
 5   고가      4 non-null      float16
 6   저가      4 non-null      float16
 7   거래량     4 non-null      float32
 8   거래대금    4 non-null      float32
 9   상장시가총액  4 non-null      float32
dtypes: float16(6), float32(3), object(1)
memory usage: 53.9+ KB
None


[1m┌▣ [4mdf.head()[0m ---- ---- ---- ----


Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2018/12/28,272.75,1.589844,0.589844,272.25,273.75,272.0,48040.0,1996651.0,922911424.0
1,2018/12/27,271.0,-0.72998,-0.27002,272.0,272.5,270.25,64008.0,2660684.0,917445376.0
2,2018/12/26,271.75,-4.488281,-1.620117,272.75,274.0,270.75,70175.0,2784379.0,920513728.0
3,2018/12/24,276.25,-0.059998,-0.020004,274.75,277.0,274.25,46698.0,2025262.0,934401856.0
4,2018/12/21,276.5,0.150024,0.049988,,,,,,




[1m┌▣ [4mdf.columns.to_list()[0m ---- ---- ---- ----
['일자', '종가', '대비', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '상장시가총액']


## 전처리

In [4]:
# 컬럼명 변경하기
df_kip.rename(columns={"일자": "연_월_일", "대비":"종가_대비"}, inplace=True)
# 컬럼 순서
list_colOrder = ['연_월_일', '시가', '고가', '저가', '종가', "종가_대비","등락률", '거래량','거래대금', '상장시가총액']
df_kip = df_kip[list_colOrder]
df_kip.head(2)

Unnamed: 0,연_월_일,시가,고가,저가,종가,종가_대비,등락률,거래량,거래대금,상장시가총액
0,2018/12/28,261.5,262.75,261.25,262.0,1.410156,0.540039,62701.0,2895636.0,1165450000.0
1,2018/12/27,261.5,261.75,259.75,260.5,-0.409912,-0.160034,84457.0,4081822.0,1158683000.0


In [5]:
# 컬럼명 변경하기
df_keip.rename(columns={"일자": "연_월_일", "대비":"종가_대비"}, inplace=True)
# 컬럼 순서
list_colOrder = ['연_월_일', '시가', '고가', '저가', '종가', "종가_대비","등락률", '거래량','거래대금', '상장시가총액']
df_keip = df_keip[list_colOrder]
df_keip.head(2)

Unnamed: 0,연_월_일,시가,고가,저가,종가,종가_대비,등락률,거래량,거래대금,상장시가총액
0,2018/12/28,272.25,273.75,272.0,272.75,1.589844,0.589844,48040.0,1996651.0,922911424.0
1,2018/12/27,272.0,272.5,270.25,271.0,-0.72998,-0.27002,64008.0,2660684.0,917445376.0


## 전처리 : MinMaxScaling

# df_ip0 : merge axis=0

In [6]:
df_kip_ = df_kip.copy()
df_keip_ = df_keip.copy()

df_kip_["주가지수명"] = "KRX_KOSPI200"
df_keip_["주가지수명"] = "KRX_KOSPI200ESG"

df_ip0 = pd.concat([df_kip_,  df_keip_])

df_ip0.sort_values(by =["연_월_일", "주가지수명"])
df_ip0

Unnamed: 0,연_월_일,시가,고가,저가,종가,종가_대비,등락률,거래량,거래대금,상장시가총액,주가지수명
0,2018/12/28,261.50,262.75,261.25,262.000,1.410156,0.540039,62701.0,2895636.0,1.165450e+09,KRX_KOSPI200
1,2018/12/27,261.50,261.75,259.75,260.500,-0.409912,-0.160034,84457.0,4081822.0,1.158683e+09,KRX_KOSPI200
2,2018/12/26,261.00,262.50,259.25,261.000,-3.470703,-1.309570,92493.0,4372646.0,1.161841e+09,KRX_KOSPI200
3,2018/12/24,263.50,265.00,263.00,264.500,-0.609863,-0.229980,61962.0,2930704.0,1.176009e+09,KRX_KOSPI200
4,2018/12/21,263.75,265.00,263.25,265.000,0.250000,0.090027,92600.0,4430738.0,1.179096e+09,KRX_KOSPI200
...,...,...,...,...,...,...,...,...,...,...,...
1716,2012/01/06,,,,240.500,-2.960938,-1.219727,,,,KRX_KOSPI200ESG
1717,2012/01/05,,,,243.500,-0.549805,-0.229980,,,,KRX_KOSPI200ESG
1718,2012/01/04,,,,244.125,-1.820312,-0.740234,,,,KRX_KOSPI200ESG
1719,2012/01/03,,,,245.875,7.191406,3.009766,,,,KRX_KOSPI200ESG


## 전처리

In [7]:
m.DerivedCol_Date(df_ip0, col_YMD="연_월_일", inplace=True)

inplace : True


Unnamed: 0,연_월_일,시가,고가,저가,종가,종가_대비,등락률,거래량,거래대금,상장시가총액,주가지수명,연,분기,월,연_분기,연_월,분기_월,연_분기_월,일,월_일
0,2018-12-28,261.50,262.75,261.25,262.000,1.410156,0.540039,62701.0,2895636.0,1.165450e+09,KRX_KOSPI200,2018,4,12,2018-4,2018-12,4-12,2018-4-12,28,12-28
1,2018-12-27,261.50,261.75,259.75,260.500,-0.409912,-0.160034,84457.0,4081822.0,1.158683e+09,KRX_KOSPI200,2018,4,12,2018-4,2018-12,4-12,2018-4-12,27,12-27
2,2018-12-26,261.00,262.50,259.25,261.000,-3.470703,-1.309570,92493.0,4372646.0,1.161841e+09,KRX_KOSPI200,2018,4,12,2018-4,2018-12,4-12,2018-4-12,26,12-26
3,2018-12-24,263.50,265.00,263.00,264.500,-0.609863,-0.229980,61962.0,2930704.0,1.176009e+09,KRX_KOSPI200,2018,4,12,2018-4,2018-12,4-12,2018-4-12,24,12-24
4,2018-12-21,263.75,265.00,263.25,265.000,0.250000,0.090027,92600.0,4430738.0,1.179096e+09,KRX_KOSPI200,2018,4,12,2018-4,2018-12,4-12,2018-4-12,21,12-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1716,2012-01-06,,,,240.500,-2.960938,-1.219727,,,,KRX_KOSPI200ESG,2012,1,1,2012-1,2012-1,1-1,2012-1-1,6,1-6
1717,2012-01-05,,,,243.500,-0.549805,-0.229980,,,,KRX_KOSPI200ESG,2012,1,1,2012-1,2012-1,1-1,2012-1-1,5,1-5
1718,2012-01-04,,,,244.125,-1.820312,-0.740234,,,,KRX_KOSPI200ESG,2012,1,1,2012-1,2012-1,1-1,2012-1-1,4,1-4
1719,2012-01-03,,,,245.875,7.191406,3.009766,,,,KRX_KOSPI200ESG,2012,1,1,2012-1,2012-1,1-1,2012-1-1,3,1-3


## 영속화

In [8]:
m.DfPrst(df_ip0, fp_ip0)

[]


# df_ip1 : merge axis=1

In [9]:
df_ip1 = pd.merge(df_kip, df_keip, how="outer", on="연_월_일", suffixes=("KOSPI200", "_KOSPI200ESG"))
df_ip1.sort_values(by =["연_월_일"])
df_ip1

Unnamed: 0,연_월_일,시가KOSPI200,고가KOSPI200,저가KOSPI200,종가KOSPI200,종가_대비KOSPI200,등락률KOSPI200,거래량KOSPI200,거래대금KOSPI200,상장시가총액KOSPI200,시가_KOSPI200ESG,고가_KOSPI200ESG,저가_KOSPI200ESG,종가_KOSPI200ESG,종가_대비_KOSPI200ESG,등락률_KOSPI200ESG,거래량_KOSPI200ESG,거래대금_KOSPI200ESG,상장시가총액_KOSPI200ESG
0,2018/12/28,261.500,262.750,261.250,262.000,1.410156,0.540039,62701.0,2895636.0,1.165450e+09,272.25,273.75,272.00,272.75,1.589844,0.589844,48040.0,1996651.0,922911424.0
1,2018/12/27,261.500,261.750,259.750,260.500,-0.409912,-0.160034,84457.0,4081822.0,1.158683e+09,272.00,272.50,270.25,271.00,-0.729980,-0.270020,64008.0,2660684.0,917445376.0
2,2018/12/26,261.000,262.500,259.250,261.000,-3.470703,-1.309570,92493.0,4372646.0,1.161841e+09,272.75,274.00,270.75,271.75,-4.488281,-1.620117,70175.0,2784379.0,920513728.0
3,2018/12/24,263.500,265.000,263.000,264.500,-0.609863,-0.229980,61962.0,2930704.0,1.176009e+09,274.75,277.00,274.25,276.25,-0.059998,-0.020004,46698.0,2025262.0,934401856.0
4,2018/12/21,263.750,265.000,263.250,265.000,0.250000,0.090027,92600.0,4430738.0,1.179096e+09,,,,276.50,0.150024,0.049988,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2215,2010/01/08,222.625,222.750,219.125,222.625,1.349609,0.609863,157255.0,5940569.0,7.750845e+08,,,,,,,,,
2216,2010/01/07,224.250,224.750,221.250,221.250,-3.359375,-1.500000,183417.0,6344257.0,7.702191e+08,,,,,,,,,
2217,2010/01/06,223.875,225.000,223.500,224.625,1.830078,0.819824,170500.0,5175271.0,7.810067e+08,,,,,,,,,
2218,2010/01/05,224.250,224.375,222.125,222.875,-0.649902,-0.290039,153282.0,5730882.0,7.742376e+08,,,,,,,,,


## 전처리

In [10]:
m.DerivedCol_Date(df_ip1, col_YMD="연_월_일", inplace=True)

inplace : True


Unnamed: 0,연_월_일,시가KOSPI200,고가KOSPI200,저가KOSPI200,종가KOSPI200,종가_대비KOSPI200,등락률KOSPI200,거래량KOSPI200,거래대금KOSPI200,상장시가총액KOSPI200,시가_KOSPI200ESG,고가_KOSPI200ESG,저가_KOSPI200ESG,종가_KOSPI200ESG,종가_대비_KOSPI200ESG,등락률_KOSPI200ESG,거래량_KOSPI200ESG,거래대금_KOSPI200ESG,상장시가총액_KOSPI200ESG,연,분기,월,연_분기,연_월,분기_월,연_분기_월,일,월_일
0,2018-12-28,261.500,262.750,261.250,262.000,1.410156,0.540039,62701.0,2895636.0,1.165450e+09,272.25,273.75,272.00,272.75,1.589844,0.589844,48040.0,1996651.0,922911424.0,2018,4,12,2018-4,2018-12,4-12,2018-4-12,28,12-28
1,2018-12-27,261.500,261.750,259.750,260.500,-0.409912,-0.160034,84457.0,4081822.0,1.158683e+09,272.00,272.50,270.25,271.00,-0.729980,-0.270020,64008.0,2660684.0,917445376.0,2018,4,12,2018-4,2018-12,4-12,2018-4-12,27,12-27
2,2018-12-26,261.000,262.500,259.250,261.000,-3.470703,-1.309570,92493.0,4372646.0,1.161841e+09,272.75,274.00,270.75,271.75,-4.488281,-1.620117,70175.0,2784379.0,920513728.0,2018,4,12,2018-4,2018-12,4-12,2018-4-12,26,12-26
3,2018-12-24,263.500,265.000,263.000,264.500,-0.609863,-0.229980,61962.0,2930704.0,1.176009e+09,274.75,277.00,274.25,276.25,-0.059998,-0.020004,46698.0,2025262.0,934401856.0,2018,4,12,2018-4,2018-12,4-12,2018-4-12,24,12-24
4,2018-12-21,263.750,265.000,263.250,265.000,0.250000,0.090027,92600.0,4430738.0,1.179096e+09,,,,276.50,0.150024,0.049988,,,,2018,4,12,2018-4,2018-12,4-12,2018-4-12,21,12-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2215,2010-01-08,222.625,222.750,219.125,222.625,1.349609,0.609863,157255.0,5940569.0,7.750845e+08,,,,,,,,,,2010,1,1,2010-1,2010-1,1-1,2010-1-1,8,1-8
2216,2010-01-07,224.250,224.750,221.250,221.250,-3.359375,-1.500000,183417.0,6344257.0,7.702191e+08,,,,,,,,,,2010,1,1,2010-1,2010-1,1-1,2010-1-1,7,1-7
2217,2010-01-06,223.875,225.000,223.500,224.625,1.830078,0.819824,170500.0,5175271.0,7.810067e+08,,,,,,,,,,2010,1,1,2010-1,2010-1,1-1,2010-1-1,6,1-6
2218,2010-01-05,224.250,224.375,222.125,222.875,-0.649902,-0.290039,153282.0,5730882.0,7.742376e+08,,,,,,,,,,2010,1,1,2010-1,2010-1,1-1,2010-1-1,5,1-5


## 영속화

In [11]:
m.DfPrst(df_ip1, fp_ip1)

['../data/indexPrice.parquet']
