In [1]:
import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt 
plt.rcParams['font.family'] = 'D2Coding'
import seaborn as sns 

from tqdm import tqdm
from datetime import datetime
from dateutil.relativedelta import relativedelta

import warnings 

warnings.filterwarnings(action='ignore')

In [2]:
import sys 

sys.path.append("../")

from asset.common.utils import reset_seeds, load_stock, load_corps

# global variables

In [3]:
import easydict
args = easydict.EasyDict()

args.SEED = 42

args.df_corps_path = '../data/meta/stock_corps.csv'
args.save_stock_preprocessing_path = '../data/stock/preprocessing/{stock_code}.csv'

In [5]:
df_corps = load_corps(args.df_corps_path)
df_corps.head(2)

(2494, 13)


Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,corp_cls_nm,sector,product,industry_wics_ss_nm,industry_wics_ss_cd,industry_wics_ms_cd,industry_wics_ms_nm,industry_wics_ls_cd,industry_wics_ls_nm
0,956028,엑세스바이오,950130,K,kosdaq,의료용품 및 기타 의약 관련제품 제조업,"말라리아 진단키트(RDT), HIV 진단키트(RDT)",생명과학도구및서비스,352030,3520,제약과생물공학,35,건강관리
1,783246,글로벌에스엠,900070,K,kosdaq,기타 금융업,"자회사 제품 : IT기기용 정밀 화스너부품(스크류, 샤프트, 스프링)",자동차부품,251010,2510,자동차와부품,25,경기관련소비재


# 산업 분야별 대장주 분석 

- 자동자
    - 현대차, 기아, 현대모비스, 피에이치에이, 현대위아, 만도, 구영테크
- 반도체
- 바이오
- 베터리
- 화장품 
- 엔터 
- 여행
- IT 
- AI
- 로보트 

## 자동차

In [17]:
krx_group = {}

for krx, group in df_corps.groupby(['sector']):
    krx_group[krx[0]] = group[['corp_name', 'stock_code', 'corp_cls_nm', 'sector']]

In [18]:
df_corps[df_corps['corp_name'] == '현대자동차']

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,corp_cls_nm,sector,product,industry_wics_ss_nm,industry_wics_ss_cd,industry_wics_ms_cd,industry_wics_ms_nm,industry_wics_ls_cd,industry_wics_ls_nm
1050,164742,현대자동차,5380,Y,kospi,자동차용 엔진 및 자동차 제조업,"자동차(승용차,버스,트럭,특장차),자동차부품,자동차전착도료 제조,차량정비사업",자동차,251020,2510,자동차와부품,25,경기관련소비재


In [23]:
df_corps[df_corps['corp_name'] == '현대모비스']

Unnamed: 0,corp_code,corp_name,stock_code,corp_cls,corp_cls_nm,sector,product,industry_wics_ss_nm,industry_wics_ss_cd,industry_wics_ms_cd,industry_wics_ms_nm,industry_wics_ls_cd,industry_wics_ls_nm
1339,164788,현대모비스,12330,Y,kospi,자동차 신품 부품 제조업,"자동차A/S용 부품,컨테이너,특수중기,산업기계제품 생산,판매,부품모듈화,부품수출사업...",자동차부품,251010,2510,자동차와부품,25,경기관련소비재


In [24]:
df_car = krx_group['자동차용 엔진 및 자동차 제조업']
df_car = pd.concat([df_car, krx_group['자동차 신품 부품 제조업']], axis=0)
print(df_car.shape)
df_car

(100, 4)


Unnamed: 0,corp_name,stock_code,corp_cls_nm,sector
592,동양피스톤,092780,kospi,자동차용 엔진 및 자동차 제조업
1050,현대자동차,005380,kospi,자동차용 엔진 및 자동차 제조업
1556,기아,000270,kospi,자동차용 엔진 및 자동차 제조업
1935,광림,014200,kosdaq,자동차용 엔진 및 자동차 제조업
2455,KG모빌리티,003620,kospi,자동차용 엔진 및 자동차 제조업
...,...,...,...,...
2352,현대공업,170030,kosdaq,자동차 신품 부품 제조업
2366,평화홀딩스,010770,kospi,자동차 신품 부품 제조업
2383,명신산업,009900,kospi,자동차 신품 부품 제조업
2393,평화산업,090080,kospi,자동차 신품 부품 제조업


In [34]:
df_car_dict = load_stock(df_car['stock_code'].unique(), args.save_stock_preprocessing_path)
df_car_dict.keys()

100%|██████████| 100/100 [00:01<00:00, 50.96it/s]

100





dict_keys(['092780', '005380', '000270', '014200', '003620', '015750', '005710', '104040', '200880', '053060', '013720', '011320', '013520', '123040', '018500', '033250', '378850', '024910', '021820', '002920', '075180', '122690', '064960', '123410', '024120', '128540', '019540', '080470', '212560', '053270', '090150', '118990', '005030', '004100', '011210', '265560', '043370', '078590', '046070', '122350', '001420', '085910', '092200', '002880', '065500', '066590', '215360', '310870', '130740', '143210', '000430', '419050', '015230', '113810', '016740', '012330', '012860', '024830', '126640', '010690', '024740', '053700', '038110', '009320', '072470', '214330', '013310', '001380', '012280', '123700', '013870', '105330', '041650', '003570', '010100', '071850', '001620', '031510', '005850', '023000', '204320', '009680', '023810', '308170', '012340', '006660', '010580', '067570', '241690', '024900', '023800', '033530', '234100', '097780', '290120', '170030', '010770', '009900', '090080',

In [37]:
df_tmp = df_car_dict['092780'].loc['2023'].reset_index()
df_tmp

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,Day_Range,ta_bol_high,ta_bol_low,...,ta_t_Parabolic SAR,ta_t_STC,ta_m_RSI,ta_m_SRSI,ta_m_TSI,ta_m_AO,ta_m_KAMA,ta_m_ROC,ta_m_PPO,ta_m_PVO
0,2023-01-02,4795,4880,4675,4720,26417,-0.015641,-0.015641,5445.725058,4593.774942,...,5058.294152,0.390625,42.966463,0.188672,-1.193732,-135.250000,5019.351718,-10.266160,-0.203530,-22.106437
1,2023-01-03,4630,4755,4480,4705,48040,-0.003178,0.016199,5438.886863,4555.113137,...,5015.630620,0.195312,42.412577,0.174241,-3.473357,-129.058824,5018.043282,-11.560150,-0.466960,-21.933963
2,2023-01-04,4605,4795,4605,4755,21975,0.010627,0.032573,5425.284318,4529.215682,...,4962.067558,0.097656,44.959615,0.240601,-4.576998,-150.235294,5016.948409,-9.772296,-0.586754,-25.345604
3,2023-01-05,4755,4855,4740,4775,47880,0.004206,0.004206,5412.461282,4508.538718,...,4913.860802,0.048828,45.988667,0.298563,-5.175695,-160.264706,5015.941340,-8.874046,-0.641323,-24.232149
4,2023-01-06,4780,4920,4770,4780,38997,0.001047,0.000000,5408.443017,4491.556983,...,4913.860802,0.024414,46.259178,0.329073,-5.589857,-152.573529,5014.959274,-7.364341,-0.668717,-24.470542
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,2023-08-04,5660,5710,5560,5650,29270,-0.003527,-0.001767,6037.505943,5406.494057,...,5738.684692,0.037038,54.236905,0.363117,16.879808,192.926471,5036.591057,0.000000,1.565031,-16.894036
148,2023-08-07,5610,5640,5470,5630,35856,-0.003540,0.003565,6036.031445,5399.968555,...,5738.684692,0.018519,53.138156,0.473841,15.269555,155.191176,5039.061022,0.177936,1.400530,-18.934947
149,2023-08-08,5580,5660,5520,5580,21995,-0.008881,0.000000,6029.175302,5385.824698,...,5738.684692,0.009259,50.389812,0.318250,13.111632,107.161765,5041.312589,-1.238938,1.185085,-22.565985
150,2023-08-09,5580,5610,5510,5590,20272,0.001792,0.001792,6004.832223,5379.167777,...,5738.684692,0.004630,50.936373,0.349192,11.487879,49.867647,5043.596407,-0.710480,1.016946,-25.900102


In [38]:
df_tmp['month'] = df_tmp['Date'].dt.month

In [42]:
df_tmp.groupby(['month']).agg({'Close':'mean', 'Volume':'mean', 'Day_Range':'mean'}).reset_index()

Unnamed: 0,month,Close,Volume,Day_Range
0,1,4928.5,110488.15,0.002905074
1,2,5178.0,45636.55,8.802631e-07
2,3,4850.454545,42579.272727,-0.003963835
3,4,4553.75,105855.25,-0.002110517
4,5,4808.75,91431.6,0.004771318
5,6,5033.571429,41137.380952,0.002071139
6,7,5658.095238,100637.095238,0.003775343
7,8,5653.75,35875.75,-0.001382791


In [57]:
df_car_group = pd.DataFrame()
error_list = []

for k in df_car_dict.keys():
    try:
        _df = df_car_dict[k]
        _df = _df.loc['2023'].reset_index()
        _df['month'] = _df['Date'].dt.month
        _df = _df.groupby(['month']).agg({'Close':'mean', 'Volume':'mean', 'Day_Range':'mean'}).reset_index()
        _df['stock_code'] = k
        df_car_group = pd.concat([df_car_group, _df], axis=0)
    except:
        error_list.append(k)

In [58]:
error_list

['078590']

In [61]:
print(f'{df_car_group.shape} / {df_car_group["stock_code"].nunique()}')
df_car_group.head(2)

(777, 5) / 99


Unnamed: 0,month,Close,Volume,Day_Range,stock_code
0,1,4928.5,110488.15,0.002905074,92780
1,2,5178.0,45636.55,8.802631e-07,92780


In [None]:
df_car_group = pd.merge(df_car_group, df_corps, on=['stock_code'], how=left)