# 마이데이터 투자탭 데모 

## Import libraries

In [36]:
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly

import pandas as pd
import numpy as np

from sklearn.preprocessing import RobustScaler, PowerTransformer

import scipy
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.regression.rolling import RollingOLS

import pickle

In [2]:
import FinanceDataReader as fdr
import quantstats as qs

In [3]:
## custom libs

from korquanttools.pricevolume.loader import KRXPriceDM
from korquanttools.pricevolume.utils import DateUtil
from korquanttools.pricevolume.config import PathConfig

## Import dataset

In [4]:
# Global parameters

START = 20140101
END = 20221202

WINDOW = 252 # trading day 기준
# WINDOW = 60 # trading day 기준
# WINDOW = 20 # trading day 기준

In [5]:
# Init data module
pricevolume = KRXPriceDM(START, END)
pricevolume.get_info()


        * DM name: KRX_pricevolume
        * DM description: Basic price-volume data imported from KRX website & NAVER finance. Has KOSPI, KOSDAQ, KONEX stocks.
        * birthday: 20211203
        * DM period: 19990101 ~ 
        * Available data: ['lv1', 'open', 'high', 'low', 'close', 'volume', 'dollarvolume', 'marketcap']
        


### 수익률

In [6]:
## stock return
return_df = pd.read_pickle(PathConfig.cache_path / f"temp_return_{START}_{END}")

In [7]:
holidays = return_df.isnull().all(axis=1)
tradingdays = ~holidays

holidays = holidays.index[holidays]
tradingdays = tradingdays.index[tradingdays]

tradingdays

DatetimeIndex(['2014-01-02', '2014-01-03', '2014-01-06', '2014-01-07',
               '2014-01-08', '2014-01-09', '2014-01-10', '2014-01-13',
               '2014-01-14', '2014-01-15',
               ...
               '2022-11-21', '2022-11-22', '2022-11-23', '2022-11-24',
               '2022-11-25', '2022-11-28', '2022-11-29', '2022-11-30',
               '2022-12-01', '2022-12-02'],
              dtype='datetime64[ns]', name='trdDd', length=2190, freq=None)

In [8]:
return_df = return_df.loc[tradingdays, :].copy()

### 시가총액

In [9]:
# Data for 2nd Factor: SMB (Small Minus Big)

marcap_df = pricevolume.get_data("marketcap")

In [10]:
marcap_df = marcap_df.astype(float) # object로 되어있었음

In [11]:
marcap_df = marcap_df.loc[tradingdays, :].copy()

### Grouping

In [12]:
with open('INDUSTRY_NAME2CODE.pickle', 'rb') as handle:
    INDUSTRY_NAME2CODE = pickle.load(handle)

In [13]:
INDUSTRY_CODE2NAME = {v:k for k,v in INDUSTRY_NAME2CODE.items()}

In [14]:
industry_df = pd.read_pickle('krx_industry_df_20140101_20221202.pickle')

In [15]:
with open('sid2name.pkl', 'rb') as p:
    sid2name = pickle.load(p)

## 업종별 수익률

In [16]:
DATE_BEFORE = -1

In [17]:
last_return_v = return_df.iloc[DATE_BEFORE]

last_marcap_v = marcap_df.iloc[DATE_BEFORE]
last_group_v = industry_df.iloc[DATE_BEFORE]

In [18]:
last_lv1_df = pd.DataFrame(data={
    'ret': last_return_v,
    'marcap': last_marcap_v,
    'group_idx': last_group_v,
})

In [19]:
group_marcap_weight_df = last_lv1_df[['group_idx', 'marcap']].groupby(by='group_idx').sum()
group_marcap_weight_df.reset_index(drop=False, inplace=True)

group_marcap_weight_df.rename(columns={'marcap': 'group_marcap_sum'}, inplace=True)

In [20]:
last_lv1_df = last_lv1_df.join(group_marcap_weight_df, on='group_idx', lsuffix='', rsuffix='_right')
last_lv1_df.drop(columns=['group_idx_right'], inplace=True)

last_lv1_df['group_marcap_weight'] = last_lv1_df['marcap'] / last_lv1_df['group_marcap_sum']
last_lv1_df['group_weighted_ret'] = last_lv1_df['ret'] * last_lv1_df['group_marcap_weight']


In [21]:
group_tree_df = last_lv1_df[['group_idx', 'group_weighted_ret', 'marcap']].groupby(by='group_idx').sum()
group_tree_df.reset_index(drop=False, inplace=True)
group_tree_df['group_name'] = group_tree_df['group_idx'].apply(lambda g_idx: INDUSTRY_CODE2NAME[g_idx])

In [22]:
group_tree_df['group_weighted_ret'] = 100 * group_tree_df['group_weighted_ret']

In [23]:
group_tree_df.columns

Index(['group_idx', 'group_weighted_ret', 'marcap', 'group_name'], dtype='object')

In [25]:
group_tree_df['log_marcap'] = group_tree_df['marcap'].apply(np.log)

In [34]:
scaler = RobustScaler()

group_tree_df['robust_marcap'] = scaler.fit_transform(group_tree_df['marcap'].to_frame())
# group_tree_df['robust_marcap'] = group_tree_df['robust_marcap'] + abs(group_tree_df['robust_marcap'].min())

In [37]:
scaler = PowerTransformer(method='yeo-johnson')

group_tree_df['power_marcap'] = scaler.fit_transform(group_tree_df['marcap'].to_frame())

In [40]:
group_tree_df['sqrt_marcap'] = group_tree_df['marcap'].apply(np.sqrt)

In [47]:
chart_name = '업종별 수익률 현황'

treemap_fig = px.treemap(
    group_tree_df, 
    title=chart_name,
    path=[
        px.Constant('KOSPI & KOSDAQ'),
        'group_name',
    ],
    # values='power_marcap',
    values='marcap',
    color='group_weighted_ret',
    # hover_data=[],
    # color_discrete_sequence=,
    color_continuous_scale='RdBu_r',
    color_continuous_midpoint=0,
)

treemap_fig.update_layout(margin=dict(t=50, l=25, b=25))
treemap_fig.show()
