# Finance Dashboard 

## 0. Import libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn
# import streamlit as st

In [2]:
import FinanceDataReader as fdr
import quantstats as qs

In [53]:
## custom libs

from korquanttools.pricevolume.loader import KRXPriceDM
from korquanttools.pricevolume.utils import DateUtil
from korquanttools.pricevolume.config import PathConfig

In [54]:
from tqdm import tqdm

## 1. Import data

- Import price-volume data from KRX using custom lib `korquanttools.pricevolume`

In [4]:
# Global parameters

START = 20140101
END = 20220520

In [5]:
pricevolume = KRXPriceDM(START, END)

In [10]:
pricevolume.get_info()


        * DM name: KRX_pricevolume
        * DM description: Basic price-volume data imported from KRX website & NAVER finance. Has KOSPI, KOSDAQ, KONEX stocks.
        * birthday: 20211203
        * DM period: 19990101 ~ 
        * Available data: ['lv1', 'open', 'high', 'low', 'close', 'adj_close', 'return', 'volume', 'dollarvolume', 'marketcap']
        


In [43]:
# Download price-volume data from KRX.
# Also, build cache for faster access. (Both lv1 and lv2)

close_df = pricevolume.get_data("close")

In [44]:
close_df

ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-01,,,,,,,,,,,...,,,,,,,,,,
2014-01-02,4440,438,111500,15500,73500,32600,21750,16850,182500,93300,...,,,,,,,,,,
2014-01-03,4540,440,114000,15400,71800,33400,21450,16850,180500,93000,...,,,,,,,,,,
2014-01-04,,,,,,,,,,,...,,,,,,,,,,
2014-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-20,11850,780,15050,39100,80800,62800,35550,20900,58500,56900,...,38200,22900,27000,2185,2195,2445,2165,2215,20100,27100
2022-05-21,,,,,,,,,,,...,,,,,,,,,,
2022-05-22,,,,,,,,,,,...,,,,,,,,,,
2022-05-23,12100,779,15000,39750,80800,62600,35350,20900,59600,57200,...,38000,25700,26150,2190,2200,2460,2170,2215,18950,26500


In [45]:
close_df.loc[:, '005930'] # 삼성전자. 
# 분할했었기 때문에 가격이 100만원 대에서 6만원 대로 변한다. 

trdDd
2014-01-01        NaN
2014-01-02    1309000
2014-01-03    1296000
2014-01-04        NaN
2014-01-05        NaN
               ...   
2022-05-20      68000
2022-05-21        NaN
2022-05-22        NaN
2022-05-23      67900
2022-05-24        NaN
Name: 005930, Length: 3066, dtype: object

In [11]:
volume_df = pricevolume.get_data('volume')
dollarvolume_df = pricevolume.get_data('dollarvolume')
marketcap_df = pricevolume.get_data('marketcap')

In [13]:
volume_df

ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-01,,,,,,,,,,,...,,,,,,,,,,
2014-01-02,99452,68243,335,115508,12328,63,276618,2400,21453,3,...,,,,,,,,,,
2014-01-03,107190,65432,853,128224,12650,60,253501,255,21211,0,...,,,,,,,,,,
2014-01-04,,,,,,,,,,,...,,,,,,,,,,
2014-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-20,93735,153506,3559,211429,8204,294,266249,731,168303,494,...,12130,2662358,554572,13196,33628,139847,20756,5271,15772956,19492306
2022-05-21,,,,,,,,,,,...,,,,,,,,,,
2022-05-22,,,,,,,,,,,...,,,,,,,,,,
2022-05-23,144887,144368,3951,171256,7435,9,246477,912,178141,561,...,2711,5869598,143136,12276,67016,52004,35792,4661,1994293,11860501


In [15]:
volume_df.loc[:, '005930'] # 삼성전자
# volume 역시 unadjusted volume이다. 

trdDd
2014-01-01         NaN
2014-01-02      620500
2014-01-03      529932
2014-01-04         NaN
2014-01-05         NaN
                ...   
2022-05-20    12109671
2022-05-21         NaN
2022-05-22         NaN
2022-05-23    13684088
2022-05-24         NaN
Name: 005930, Length: 3066, dtype: object

## 2. Create additional DMs

- tradingday DM by processing `pricevolume`


In [None]:
holidays = close_df.isnull().all(axis=1)
tradingdays = ~holidays
tradingdays

trdDd
2014-01-01    False
2014-01-02     True
2014-01-03     True
2014-01-04    False
2014-01-05    False
              ...  
2022-05-20     True
2022-05-21    False
2022-05-22    False
2022-05-23     True
2022-05-24    False
Length: 3066, dtype: bool


- return & adj.open/high/low/close/volume DM by mixing `pricevolume` and `FinanceDataReader`


In [36]:
universe_df = close_df.notnull()
universe_df

ISU_SRT_CD,000020,000040,000050,000060,000070,000075,000080,000087,000100,000105,...,37550L,388050,389140,405640,412930,413600,415580,419270,389260,399720
trdDd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-01,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2014-01-02,True,True,True,True,True,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2014-01-03,True,True,True,True,True,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2014-01-04,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2014-01-05,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-20,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2022-05-21,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2022-05-22,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2022-05-23,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True


In [55]:
# Takes about 

adjOpen_df = close_df.copy()
adjHigh_df = close_df.copy()
adjLow_df = close_df.copy()
adjClose_df = close_df.copy()
adjVolume_df = close_df.copy()
return_df = close_df.copy()

for ii in tqdm(close_df.columns):
    ii_df = fdr.DataReader(ii, DateUtil.numdate2stddate(START), DateUtil.numdate2stddate(END))

    adjOpen_df.loc[:, ii] = ii_df['Open']
    adjHigh_df.loc[:, ii] = ii_df['High']
    adjLow_df.loc[:, ii] = ii_df['Low']
    adjClose_df.loc[:, ii] = ii_df['Close']
    adjVolume_df.loc[:, ii] = ii_df['Volume']
    return_df.loc[:, ii] = ii_df['Change']
    

100%|██████████| 3028/3028 [19:42<00:00,  2.56it/s]


In [56]:
adjOpen_df.to_pickle(PathConfig.cache_path / f"temp_adjOpen_{START}_{END}")
adjHigh_df.to_pickle(PathConfig.cache_path / f"temp_adjHigh_{START}_{END}")
adjLow_df.to_pickle(PathConfig.cache_path / f"temp_adjLow_{START}_{END}")
adjClose_df.to_pickle(PathConfig.cache_path / f"temp_adjClose_{START}_{END}")
adjVolume_df.to_pickle(PathConfig.cache_path / f"temp_adjVolume_{START}_{END}")
return_df.to_pickle(PathConfig.cache_path / f"temp_return_{START}_{END}")

In [57]:
adjOpen_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjOpen_{START}_{END}")
adjHigh_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjHigh_{START}_{END}")
adjLow_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjLow_{START}_{END}")
adjClose_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjClose_{START}_{END}")
adjVolume_df = pd.read_pickle(PathConfig.cache_path / f"temp_adjVolume_{START}_{END}")
return_df = pd.read_pickle(PathConfig.cache_path / f"temp_return_{START}_{END}")


### Download additional data for new DMs

- KRX sector DM downloaded from KRX
- Corporate finance fundamental analysis data from DART (using 3rd party libraries)

## 3. Make price-volume chart graph

- Simple price-volume chart using both `seaborn`/`streamlit`
- Show biggest winners/losers on mouse hover
    - Make pseudo-KOSPI portfolio mimicking KOSPI index
    - Show Top 3 winners/losers on the graph 

## 4. Make Finviz-style stock tree map

## 5. Make return correlation clustering DM 

- Also, visualize the result

## 6. Create NLP news DM