# Korean stock market EDA

## 0. Import libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
# import streamlit as st

In [2]:
import FinanceDataReader as fdr
import quantstats as qs

In [3]:
## custom libs

from korquanttools.pricevolume.loader import KRXPriceDM
from korquanttools.pricevolume.utils import DateUtil
from korquanttools.pricevolume.config import PathConfig

## 1. Import datasets

- Import price-volume data module from KRX using custom lib `korquanttools`
- Load/derive price, volume, market cap, dollar volume, etc from the dataset

In [4]:
# Global parameters

START = 20140101
END = 20220520

In [5]:
# Init data module
pricevolume = KRXPriceDM(START, END)
pricevolume.get_info()


        * DM name: KRX_pricevolume
        * DM description: Basic price-volume data imported from KRX website & NAVER finance. Has KOSPI, KOSDAQ, KONEX stocks.
        * birthday: 20211203
        * DM period: 19990101 ~ 
        * Available data: ['lv1', 'open', 'high', 'low', 'close', 'volume', 'dollarvolume', 'marketcap']
        


### 1-1. KRX raw data

In [7]:
# Download price-volume data from KRX.
# Also, build cache for faster access. (Both lv1 and lv2)

close_df = pricevolume.get_data("close")
close_df = close_df.astype(float)
close_df


ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-01,,,,,,,,,,,...,,,,,,,,,,
2014-01-02,4440.0,438.0,111500.0,15500.0,73500.0,32600.0,21750.0,16850.0,182500.0,93300.0,...,,,,,,,,,,
2014-01-03,4540.0,440.0,114000.0,15400.0,71800.0,33400.0,21450.0,16850.0,180500.0,93000.0,...,,,,,,,,,,
2014-01-04,,,,,,,,,,,...,,,,,,,,,,
2014-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-16,11850.0,791.0,15550.0,37350.0,81700.0,62900.0,35850.0,21000.0,57800.0,56800.0,...,40200.0,17250.0,25200.0,2185.0,2195.0,2460.0,2160.0,2205.0,13950.0,
2022-05-17,11900.0,788.0,15450.0,38250.0,81800.0,62900.0,36950.0,21200.0,57800.0,56700.0,...,40200.0,17450.0,25400.0,2185.0,2205.0,2535.0,2155.0,2215.0,14350.0,
2022-05-18,12000.0,783.0,15200.0,39750.0,81200.0,63000.0,36800.0,21150.0,58600.0,57100.0,...,39650.0,17650.0,25450.0,2190.0,2200.0,2540.0,2160.0,2210.0,13700.0,
2022-05-19,11900.0,780.0,15250.0,39000.0,79900.0,62300.0,36100.0,20750.0,58200.0,56600.0,...,38450.0,17650.0,24750.0,2185.0,2185.0,2490.0,2160.0,2215.0,17800.0,


In [8]:
volume_df = pricevolume.get_data('volume')
volume_df = volume_df.astype(float)

dollarvolume_df = pricevolume.get_data('dollarvolume')
dollarvolume_df = dollarvolume_df.astype(float)

marketcap_df = pricevolume.get_data('marketcap') # TODO: 현재 object로 불러와져 직접 astype(float) 해줘야 함. fix it. 
marketcap_df = marketcap_df.astype(float)

#### Caution: Data are unadjusted

- Let's take an example of Samsung Electronics (KOSPI:005930) that made a stock split

In [9]:
close_df.loc[:, '005930'] # 삼성전자. 
# 분할했었기 때문에 가격이 100만원 대에서 6만원 대로 변한다. 

trdDd
2014-01-01          NaN
2014-01-02    1309000.0
2014-01-03    1296000.0
2014-01-04          NaN
2014-01-05          NaN
                ...    
2022-05-16      66300.0
2022-05-17      67600.0
2022-05-18      68100.0
2022-05-19      67500.0
2022-05-20      68000.0
Name: 005930, Length: 3062, dtype: float64

In [10]:
volume_df.loc[:, '005930'] # 삼성전자
# volume 역시 unadjusted volume이다. 

trdDd
2014-01-01           NaN
2014-01-02      620500.0
2014-01-03      529932.0
2014-01-04           NaN
2014-01-05           NaN
                 ...    
2022-05-16    11937555.0
2022-05-17    15680447.0
2022-05-18    16486319.0
2022-05-19    17073727.0
2022-05-20    12109671.0
Name: 005930, Length: 3062, dtype: float64

### 1-2. Create derived datasets

In [11]:
lv1_df = pricevolume.get_data('lv1')
lv1_df

Unnamed: 0,ISU_SRT_CD,ISU_ABBRV,MKT_NM,SECT_TP_NM,TDD_CLSPRC,FLUC_TP_CD,CMPPREVDD_PRC,FLUC_RT,TDD_OPNPRC,TDD_HGPRC,TDD_LWPRC,ACC_TRDVOL,ACC_TRDVAL,MKTCAP,LIST_SHRS,MKT_ID,trdDd
1977,060310,3S,KOSDAQ,벤처기업부,4485,1,45,1.01,4550,4655,4475,402448,1832921960,183401821980,40892268,KSQ,2014-01-02
1978,013340,AJS,KOSDAQ,중견기업부,605,2,-15,-2.42,634,634,605,261969,161351265,23139709065,38247453,KSQ,2014-01-02
1979,068400,AJ렌터카,KOSPI,,12600,1,250,2.02,12450,12850,12350,153578,1933645400,279043380000,22146300,STK,2014-01-02
1980,006840,AK홀딩스,KOSPI,,41000,1,50,0.12,41000,41400,40150,17780,726244500,477550001000,11647561,STK,2014-01-02
1981,054620,AP시스템,KOSDAQ,우량기업부,9610,2,-250,-2.54,9990,10000,9610,707268,6924202690,220663503430,22961863,KSQ,2014-01-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7041090,000547,흥국화재2우B,KOSPI,,28600,2,-450,-1.55,28400,29200,28400,132,3795550,4392960000,153600,STK,2022-05-19
7041091,000545,흥국화재우,KOSPI,,8340,2,-20,-0.24,8320,8340,8120,666,5468290,6405120000,768000,STK,2022-05-19
7041092,003280,흥아해운,KOSPI,,2840,2,-115,-3.89,2880,2925,2825,589661,1686066265,682806713160,240424899,STK,2022-05-19
7041093,037440,희림,KOSDAQ,우량기업부,7390,2,-360,-4.65,7570,7570,7390,261108,1944181740,102887090250,13922475,KSQ,2022-05-19


In [12]:
lv1_df.columns

Index(['ISU_SRT_CD', 'ISU_ABBRV', 'MKT_NM', 'SECT_TP_NM', 'TDD_CLSPRC',
       'FLUC_TP_CD', 'CMPPREVDD_PRC', 'FLUC_RT', 'TDD_OPNPRC', 'TDD_HGPRC',
       'TDD_LWPRC', 'ACC_TRDVOL', 'ACC_TRDVAL', 'MKTCAP', 'LIST_SHRS',
       'MKT_ID', 'trdDd'],
      dtype='object')

- Stock universe by market grouping (KOSPI, KOSDAQ, KONEX)