# Finance Dashboard 

## 0. Import libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
# import streamlit as st

In [2]:
import FinanceDataReader as fdr
import quantstats as qs

In [3]:
## custom libs

from korquanttools.pricevolume.loader import KRXPriceDM
from korquanttools.pricevolume.utils import DateUtil
from korquanttools.pricevolume.config import PathConfig

In [4]:
from tqdm import tqdm

## 1. Import data

- Import price-volume data from KRX using custom lib `korquanttools.pricevolume`

In [5]:
# Global parameters

# START = 20140101
START = 20210101
END = 20220520

In [6]:
pricevolume = KRXPriceDM(START, END)

In [7]:
pricevolume.get_info()


        * DM name: KRX_pricevolume
        * DM description: Basic price-volume data imported from KRX website & NAVER finance. Has KOSPI, KOSDAQ, KONEX stocks.
        * birthday: 20211203
        * DM period: 19990101 ~ 
        * Available data: ['lv1', 'open', 'high', 'low', 'close', 'volume', 'dollarvolume', 'marketcap']
        


In [10]:
# Download price-volume data from KRX.
# Also, build cache for faster access. (Both lv1 and lv2)

close_df = pricevolume.get_data("close")

In [11]:
close_df

ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-01,,,,,,,,,,,...,,,,,,,,,,
2021-01-02,,,,,,,,,,,...,,,,,,,,,,
2021-01-03,,,,,,,,,,,...,,,,,,,,,,
2021-01-04,19100,1150,12300,14250,73800,49100,32000,20150,79200,72300,...,,,,,,,,,,
2021-01-05,19400,1125,12300,14050,74500,49200,33200,20350,79000,71300,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-16,11850,791,15550,37350,81700,62900,35850,21000,57800,56800,...,40200,17250,25200,2185,2195,2460,2160,2205,13950,
2022-05-17,11900,788,15450,38250,81800,62900,36950,21200,57800,56700,...,40200,17450,25400,2185,2205,2535,2155,2215,14350,
2022-05-18,12000,783,15200,39750,81200,63000,36800,21150,58600,57100,...,39650,17650,25450,2190,2200,2540,2160,2210,13700,
2022-05-19,11900,780,15250,39000,79900,62300,36100,20750,58200,56600,...,38450,17650,24750,2185,2185,2490,2160,2215,17800,


In [12]:
close_df.loc[:, '005930'] # 삼성전자. 
# 분할했었기 때문에 가격이 100만원 대에서 6만원 대로 변한다. 

trdDd
2021-01-01      NaN
2021-01-02      NaN
2021-01-03      NaN
2021-01-04    83000
2021-01-05    83900
              ...  
2022-05-16    66300
2022-05-17    67600
2022-05-18    68100
2022-05-19    67500
2022-05-20    68000
Name: 005930, Length: 505, dtype: object

In [None]:
volume_df = pricevolume.get_data('volume')
dollarvolume_df = pricevolume.get_data('dollarvolume')
marketcap_df = pricevolume.get_data('marketcap')

In [None]:
volume_df

ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-01,,,,,,,,,,,...,,,,,,,,,,
2014-01-02,99452,68243,335,115508,12328,63,276618,2400,21453,3,...,,,,,,,,,,
2014-01-03,107190,65432,853,128224,12650,60,253501,255,21211,0,...,,,,,,,,,,
2014-01-04,,,,,,,,,,,...,,,,,,,,,,
2014-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-20,93735,153506,3559,211429,8204,294,266249,731,168303,494,...,12130,2662358,554572,13196,33628,139847,20756,5271,15772956,19492306
2022-05-21,,,,,,,,,,,...,,,,,,,,,,
2022-05-22,,,,,,,,,,,...,,,,,,,,,,
2022-05-23,144887,144368,3951,171256,7435,9,246477,912,178141,561,...,2711,5869598,143136,12276,67016,52004,35792,4661,1994293,11860501


In [None]:
volume_df.loc[:, '005930'] # 삼성전자
# volume 역시 unadjusted volume이다. 

trdDd
2014-01-01         NaN
2014-01-02      620500
2014-01-03      529932
2014-01-04         NaN
2014-01-05         NaN
                ...   
2022-05-20    12109671
2022-05-21         NaN
2022-05-22         NaN
2022-05-23    13684088
2022-05-24         NaN
Name: 005930, Length: 3066, dtype: object

## 2. Create additional DMs


- KOSPI, KOSDAQ

In [8]:
lv1_df = pricevolume.get_data('lv1')

In [23]:
market_cat_df = lv1_df[['ISU_SRT_CD', 'MKT_NM', 'trdDd']].copy()

In [24]:
market_cat_df['MKT_NM'].unique()

array(['KOSDAQ', 'KOSPI', 'KONEX'], dtype=object)

In [25]:
market_cat_df.loc[:, 'is_KOSPI'] = (market_cat_df['MKT_NM'] == 'KOSPI')
market_cat_df.loc[:, 'is_KOSDAQ'] = (market_cat_df['MKT_NM'] == 'KOSDAQ')
market_cat_df.loc[:, 'is_KONEX'] = (market_cat_df['MKT_NM'] == 'KONEX')

In [28]:
kospi_univ_df = pd.pivot(market_cat_df, index='trdDd', columns='ISU_SRT_CD', values='is_KOSPI')
kosdaq_univ_df = pd.pivot(market_cat_df, index='trdDd', columns='ISU_SRT_CD', values='is_KOSDAQ')
konex_univ_df = pd.pivot(market_cat_df, index='trdDd', columns='ISU_SRT_CD', values='is_KONEX')

In [31]:
kosdaq_univ_df.loc[:, '221610'] # 자안바이오 상폐일: 2022-01-05	

trdDd
2021-01-02    True
2021-01-03    True
2021-01-04    True
2021-01-05    True
2021-01-06    True
              ... 
2022-05-15     NaN
2022-05-16     NaN
2022-05-17     NaN
2022-05-18     NaN
2022-05-19     NaN
Name: 221610, Length: 503, dtype: object


- tradingday DM by processing `pricevolume`


In [13]:
holidays = close_df.isnull().all(axis=1)
tradingdays = ~holidays
tradingdays

trdDd
2021-01-01    False
2021-01-02    False
2021-01-03    False
2021-01-04     True
2021-01-05     True
              ...  
2022-05-16     True
2022-05-17     True
2022-05-18     True
2022-05-19     True
2022-05-20     True
Length: 505, dtype: bool


- return & adj.open/high/low/close/volume DM by mixing `pricevolume` and `FinanceDataReader`


In [14]:
universe_df = close_df.notnull()
universe_df

ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-01,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2021-01-02,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2021-01-03,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2021-01-04,True,True,True,True,True,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2021-01-05,True,True,True,True,True,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-16,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,False
2022-05-17,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,False
2022-05-18,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,False
2022-05-19,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,False


In [17]:
# # Takes about 20 mins

# adjOpen_df = close_df.copy()
# adjHigh_df = close_df.copy()
# adjLow_df = close_df.copy()
# adjClose_df = close_df.copy()
# adjVolume_df = close_df.copy()
# return_df = close_df.copy()

# for ii in tqdm(close_df.columns):
#     ii_df = fdr.DataReader(ii, DateUtil.numdate2stddate(START), DateUtil.numdate2stddate(END))

#     adjOpen_df.loc[:, ii] = ii_df['Open']
#     adjHigh_df.loc[:, ii] = ii_df['High']
#     adjLow_df.loc[:, ii] = ii_df['Low']
#     adjClose_df.loc[:, ii] = ii_df['Close']
#     adjVolume_df.loc[:, ii] = ii_df['Volume']
#     return_df.loc[:, ii] = ii_df['Change']
    

In [18]:
# adjOpen_df.to_pickle(PathConfig.cache_path / f"temp_adjOpen_{START}_{END}")
# adjHigh_df.to_pickle(PathConfig.cache_path / f"temp_adjHigh_{START}_{END}")
# adjLow_df.to_pickle(PathConfig.cache_path / f"temp_adjLow_{START}_{END}")
# adjClose_df.to_pickle(PathConfig.cache_path / f"temp_adjClose_{START}_{END}")
# adjVolume_df.to_pickle(PathConfig.cache_path / f"temp_adjVolume_{START}_{END}")
# return_df.to_pickle(PathConfig.cache_path / f"temp_return_{START}_{END}")

In [17]:
adjOpen_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjOpen_{START}_{END}")
adjHigh_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjHigh_{START}_{END}")
adjLow_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjLow_{START}_{END}")
adjClose_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjClose_{START}_{END}")
adjVolume_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjVolume_{START}_{END}")
return_df = pd.read_pickle(PathConfig.cache_path / f"temp_return_{START}_{END}")


### (Skip for now) Download additional data for new DMs 

- KRX sector DM downloaded from KRX
    - NOT PIT(Point-In-Time)
- Corporate finance fundamental analysis data from DART (using 3rd party libraries)
    - Later on, when I need it

In [34]:
kospi = fdr.DataReader('KS11', DateUtil.numdate2stddate(START), DateUtil.numdate2stddate(END))

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

## 3. Make price-volume chart graph

- Simple price-volume chart using both `seaborn`/`streamlit`
- Show biggest winners/losers on mouse hover
    - Make pseudo-KOSPI portfolio mimicking KOSPI index
    - Show Top 3 winners/losers on the graph 

In [None]:
sns.

## 4. Make Finviz-style stock tree map

## 5. Make return correlation clustering DM 

- Also, visualize the result

## 6. Create NLP news DM