In [47]:
import plotly.graph_objects as go

from ta.trend import MACD
from ta.momentum import StochasticOscillator

import numpy as np
import pandas as pd
from pykrx import stock
from pykrx import bond
from time import sleep

from datetime import datetime
from datetime import timedelta
import os
import time
from plotly.subplots import make_subplots
import glob

import warnings
warnings.filterwarnings('ignore')

## 데이터 로드

In [2]:
ohlcv_df_raw = pd.read_csv('ohlcv_df_raw.csv', dtype = {'ticker':str})
ticker_list = ohlcv_df_raw['ticker'].unique()

In [48]:
# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')
ohlcv_df_raw = pd.read_csv('data_crawler/kor_stock_ohlcv/kor_stock_ohlcv.csv')

ohlcv_df_raw['ticker'] = ohlcv_df_raw['ticker'].astype('str')
ohlcv_df_raw['ticker'] = ohlcv_df_raw['ticker'].str.zfill(6)

ticker_list = ohlcv_df_raw['ticker'].unique()

### 보조지표 계산

In [None]:
df_raw_total = pd.DataFrame()
df_raw_anal_total = pd.DataFrame()

for ticker_nm in ticker_list:
    df_raw = ohlcv_df_raw[ohlcv_df_raw['ticker'] == ticker_nm].reset_index(drop = True)
    
    ######################################################################
    # 보조지표
    ###################################################################### 

    # 이동평균선
    df_raw['MA5'] = df_raw['close'].rolling(window=5).mean()
    df_raw['MA20'] = df_raw['close'].rolling(window=20).mean()
    df_raw['MA60'] = df_raw['close'].rolling(window=60).mean()
    df_raw['MA120'] = df_raw['close'].rolling(window=120).mean()

    # 볼린저밴드
    std = df_raw['close'].rolling(20).std(ddof=0)

    df_raw['upper'] = df_raw['MA20'] + 2 * std
    df_raw['lower'] = df_raw['MA20'] - 2 * std

    # MACD
    # MACD 
    macd = MACD(close=df_raw['close'], 
                window_slow=26,
                window_fast=12, 
                window_sign=9)


    df_raw['MACD_DIFF'] = macd.macd_diff()
    df_raw['MACD'] = macd.macd()
    df_raw['MACD_Signal'] = macd.macd_signal()

    # RSI
    df_raw['변화량'] = df_raw['close'] - df_raw['close'].shift(1)
    df_raw['상승폭'] = np.where(df_raw['변화량']>=0, df_raw['변화량'], 0)
    df_raw['하락폭'] = np.where(df_raw['변화량'] <0, df_raw['변화량'].abs(), 0)

    # welles moving average
    df_raw['AU'] = df_raw['상승폭'].ewm(alpha=1/14, min_periods=14).mean()
    df_raw['AD'] = df_raw['하락폭'].ewm(alpha=1/14, min_periods=14).mean()
    df_raw['RSI'] = df_raw['AU'] / (df_raw['AU'] + df_raw['AD']) * 100

    df_raw['MA5-20'] = df_raw['MA5'] - df_raw['MA20']
    df_raw['MA20-60'] = df_raw['MA20'] - df_raw['MA60']
    df_raw['MA60-120'] = df_raw['MA60'] - df_raw['MA120']
    
    
    ######################################################################
    # 보조지표 분석
    ###################################################################### 
    df_raw_anal = df_raw[['date','ticker', 'close']]
    
    # 골든크로스
    # 골든 크로스 5-20
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_5_20_gold_cross = [idx for idx in range(len(df_raw)) if df_raw["MA5-20"].iloc[idx] > 0 and df_raw["MA5-20"].iloc[idx - 1] <= 0]

    # 데드 크로스 5-20
    # 양수에서 음수로 바뀌는 모든 인덱스 찾기
    idx_5_20_dead_cross = [idx for idx in range(len(df_raw)) if df_raw["MA5-20"].iloc[idx] < 0 and df_raw["MA5-20"].iloc[idx - 1] >= 0]

    # 골든 크로스 20-60
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_20_60_gold_cross = [idx for idx in range(len(df_raw)) if df_raw["MA20-60"].iloc[idx] > 0 and df_raw["MA20-60"].iloc[idx - 1] <= 0]

    # 골든 크로스 20-60
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_20_60_dead_cross = [idx for idx in range(len(df_raw)) if df_raw["MA20-60"].iloc[idx] < 0 and df_raw["MA20-60"].iloc[idx - 1] >= 0]
    

    df_raw_anal.loc[:, '5_20_cross'] = '-'
    df_raw_anal.loc[idx_5_20_gold_cross,'5_20_cross'] = '골든크로스(매수)'
    df_raw_anal.loc[idx_5_20_dead_cross,'5_20_cross'] = '데드크로스(매도)'

    df_raw_anal.loc[:, '20_60_cross'] = '-'
    df_raw_anal.loc[idx_20_60_gold_cross,'20_60_cross'] = '골든크로스(매수)'
    df_raw_anal.loc[idx_20_60_dead_cross,'20_60_cross'] = '데드크로스(매도)'
    
    
    # 정배열 역배열
    ascending_sq  = (df_raw['MA5-20'] > 0) & \
    (df_raw['MA20-60'] > 0) & \
    (df_raw['MA60-120'] > 0) 

    descending_sq  = (df_raw['MA5-20'] < 0) & \
    (df_raw['MA20-60'] < 0) & \
    (df_raw['MA60-120'] < 0) 
    
    df_raw_anal.loc[:,'array'] = '-'
    df_raw_anal.loc[ascending_sq,'array'] = '정배열(매수)'
    df_raw_anal.loc[descending_sq,'array'] = '역배열(매도)'
    
    
    # 볼린저밴드
    down_reg_sq = df_raw['upper'] - df_raw['close'] 
    top_reg_sq  = df_raw['lower'] - df_raw['close'] 

    down_reg = [idx for idx in range(1,len(df_raw)) if down_reg_sq[idx] > 0 and down_reg_sq[idx-1] <= 0]
    top_reg = [idx for idx in range(1,len(df_raw)) if top_reg_sq[idx] < 0 and top_reg_sq[idx-1] >= 0]
    
    df_raw_anal.loc[:,'Bollinger_band'] = '-'
    df_raw_anal.loc[down_reg,'Bollinger_band'] = '하향회귀(매도)'
    df_raw_anal.loc[top_reg,'Bollinger_band'] = '상향회귀(매수)'
    
    
    # MACD
    signal_down_cross = [idx for idx in range(1,len(df_raw)) if df_raw['MACD_DIFF'][idx] < 0 and df_raw['MACD_DIFF'][idx-1] >= 0]
    signal_top_corss = [idx for idx in range(1,len(df_raw)) if df_raw['MACD_DIFF'][idx] > 0 and df_raw['MACD_DIFF'][idx-1] <= 0]

    df_raw_anal.loc[:,'MACD'] = '-'
    df_raw_anal.loc[signal_down_cross,'MACD'] = '하향돌파(매도)'
    df_raw_anal.loc[signal_top_corss,'MACD'] = '상향돌파(매수)'    
    
    # RSI
    down_reg = [idx for idx in range(1,len(df_raw)) if df_raw['RSI'][idx] > 70 and df_raw['RSI'][idx-1] <= 70]
    top_reg = [idx for idx in range(1,len(df_raw)) if df_raw['RSI'][idx] < 30 and df_raw['RSI'][idx-1] >= 30]
    
    
    df_raw_anal.loc[:,'RSI'] = '-'
    df_raw_anal.loc[down_reg,'RSI'] = 'RSI 상단 하향돌파(매도)'
    df_raw_anal.loc[top_reg,'RSI'] = 'RSI 하단 상향 돌파(매수)'


    df_raw_total = pd.concat([df_raw_total, df_raw])
    df_raw_anal_total = pd.concat([df_raw_anal_total, df_raw_anal])
    
    print(ticker_nm)

095570
006840
027410
282330
138930
001460
001465
001040
079160
00104K
000120
011150
011155
001045
097950
097955
000480
000590
012030
016610
005830
000990
139130
001530
000210
001880
000215
375500
37550L
37550K
007340
004840
155660
069730
017940
365550
383220
007700
114090
078930
006360
001250
007070
078935
012630
039570
089470
294870
009540
267250
267270
322000
042670
267260
329180
097230
014790
003580
204320
060980
011200
082740
035000
003560
175330
234080
001060
001067
001065
096760
105560
432320
002380
344820
009070
009440
119650
092220
003620
016380
001390
033180
015590
001940
025000
092230
000040
044450
030200
033780
058850
058860
093050
003550
034220
051900
051905
373220
003555
032640
011070
066570
066575
037560
051910
051915
079550
006260
010120
000680
229640
108320
001120
108670
108675
383800
38380K
023150
035420
181710
400760
005940
005945
338100
034310
030190
008260
004250
004255
456040
010060
178920
005490
010950
010955
034120
005090
001380
004060
001770
002360
009160
123700

012340
214870
270870
144960
085670
064260
340360
039560
154040
032190
068240
020400
008830
048470
004780
017650
007720
389260
317850


In [None]:
df_raw_total = df_raw_total.reset_index(drop = True)

In [None]:
df_raw_total.head()

In [None]:
df_raw_anal_total.head()

### 등락률 계산

In [8]:
diff_date = 60
now = datetime.now()
now = now + timedelta(days=-diff_date)
set_date = now.strftime('%Y-%m-%d')
df1 =  ohlcv_df_raw[ohlcv_df_raw['date'] > set_date].groupby("ticker")['date'].min().reset_index()


In [9]:
diff_date_list = [30, 90, 180, 240, 365]
now = datetime.now()
set_date = now.strftime('%Y-%m-%d')

ohlcv_df_raw['date'] = pd.to_datetime(ohlcv_df_raw['date'])


df1_set =  ohlcv_df_raw[ohlcv_df_raw['date'] > '2023-10-01'].groupby("ticker")['date'].max().reset_index()
# df1_set =  ohlcv_df_raw[ohlcv_df_raw['date'] == '2023-10-06'].groupby("ticker")['date'].max().reset_index()
df1_set = df1_set[['ticker', 'date']]
df1_set = pd.merge(df1_set, ohlcv_df_raw[['ticker', 'date', 'close', 'corp_name']],
              on = ['ticker', 'date'], 
              how = 'left')

    
for diff_date in diff_date_list:
# diff_date = 240
    now = datetime.now()
    now = now + timedelta(days=-diff_date)
    set_date = now.strftime('%Y-%m-%d')
    df1 =  ohlcv_df_raw[ohlcv_df_raw['date'] > set_date].groupby("ticker")['date'].min().reset_index()

    
    df1 = df1[['ticker','date']]
    df1 = pd.merge(df1, ohlcv_df_raw[['ticker', 'date', 'close']],
                  on = ['ticker', 'date'], 
                  how = 'left')
    
    df1.columns = ['ticker', f'date_{diff_date}', f'close_{diff_date}']

    df1_set = pd.merge(df1_set, df1, 
                      on = 'ticker', 
                      how = 'left')

    df1_set[f'per_{diff_date}'] = (df1_set['close'] - df1_set[f'close_{diff_date}']) / df1_set[f'close_{diff_date}']    

In [10]:
df1_set_2 = df1_set[['date','ticker','per_30', 'per_90', 'per_180', 'per_240', 'per_365', 'corp_name']]

In [11]:
df_per_total = pd.DataFrame()
per_set = ['per_30', 'per_90', 'per_180', 'per_240', 'per_365']
ascending_list = [True, False]

for per_value in per_set:
    for ascending_value in ascending_list:
        df_per =  df1_set_2.sort_values(by =per_value, ascending = ascending_value).head()
        df_per['type'] = per_value
        df_per['rank'] = range(1,6)
        df_per_total = pd.concat([df_per_total, df_per])

df_per_total = df_per_total.drop_duplicates()        

## 최종

In [17]:
df_raw_total

Unnamed: 0,date,open,high,low,close,volume,price_change_percentage,ticker,corp_name,market,...,MACD_Signal,변화량,상승폭,하락폭,AU,AD,RSI,MA5-20,MA20-60,MA60-120
0,2018-01-02,51380,51400,50780,51020,169485,0.117739,005930,삼성전자,KOSPI_100,...,,,0.0,0.0,,,,,,
1,2018-01-03,52540,52560,51420,51620,200270,1.176009,005930,삼성전자,KOSPI_100,...,,600.0,600.0,0.0,,,,,,
2,2018-01-04,52120,52180,50640,51080,233909,-1.046106,005930,삼성전자,KOSPI_100,...,,-540.0,0.0,540.0,,,,,,
3,2018-01-05,51300,52120,51200,52120,189623,2.036022,005930,삼성전자,KOSPI_100,...,,1040.0,1040.0,0.0,,,,,,
4,2018-01-08,52400,52520,51500,52020,167673,-0.191865,005930,삼성전자,KOSPI_100,...,,-100.0,0.0,100.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133107,2023-10-25,10270,10420,10100,10310,202518,0.291829,137310,에스디바이오센서,KOSPI_100,...,-467.505334,30.0,30.0,0.0,101.755167,147.638099,40.801088,-353.0,-1225.000000,-1828.750000
133108,2023-10-26,10100,10130,9800,9820,406661,-4.752667,137310,에스디바이오센서,KOSPI_100,...,-465.442540,-490.0,0.0,490.0,94.486941,172.092520,35.444194,-380.5,-1290.333333,-1759.666667
133109,2023-10-27,9900,9970,9640,9690,283074,-1.323829,137310,에스디바이오센서,KOSPI_100,...,-467.424888,-130.0,0.0,130.0,87.737873,169.085912,34.162674,-361.5,-1329.666667,-1712.750000
133110,2023-10-30,9640,9930,9630,9810,200570,1.238390,137310,에스디바이오센서,KOSPI_100,...,-468.858850,120.0,120.0,0.0,90.042311,157.008346,36.446902,-360.5,-1376.166667,-1641.166667


In [15]:
df_raw_anal_total

Unnamed: 0,date,ticker,close,5_20_cross,20_60_cross,array,Bollinger_band,MACD,RSI
0,2018-01-02,005930,51020,-,-,-,-,-,-
1,2018-01-03,005930,51620,-,-,-,-,-,-
2,2018-01-04,005930,51080,-,-,-,-,-,-
3,2018-01-05,005930,52120,-,-,-,-,-,-
4,2018-01-08,005930,52020,-,-,-,-,-,-
...,...,...,...,...,...,...,...,...,...
559,2023-10-25,137310,10310,-,-,역배열(매도),상향회귀(매수),-,-
560,2023-10-26,137310,9820,-,-,역배열(매도),상향회귀(매수),-,-
561,2023-10-27,137310,9690,-,-,역배열(매도),상향회귀(매수),하향돌파(매도),-
562,2023-10-30,137310,9810,-,-,역배열(매도),상향회귀(매수),-,-


In [18]:
df_per_total.tail()

Unnamed: 0,date,ticker,per_30,per_90,per_180,per_240,per_365,corp_name,type,rank
61,2023-10-31,47050,,-0.311813,0.320158,0.828467,1.02834,포스코인터내셔널,per_365,1
2,2023-10-31,660,,-0.005983,0.02467,0.309685,0.494859,SK하이닉스,per_365,2
34,2023-10-31,12450,,-0.032567,-0.232523,-0.09009,0.438746,한화에어로스페이스,per_365,3
13,2023-10-31,5490,,-0.236549,0.02875,0.063307,0.426343,POSCO홀딩스,per_365,4
60,2023-10-31,42660,,-0.239936,-0.279125,-0.016134,0.405515,한화오션,per_365,5


In [19]:
df_per_total

Unnamed: 0,date,ticker,per_30,per_90,per_180,per_240,per_365,corp_name,type,rank
0,2023-10-31,100,,-0.225371,-0.015437,0.003497,0.008486,유한양행,per_30,1
1,2023-10-31,270,,-0.063337,-0.100585,-0.112009,0.226475,기아,per_30,2
2,2023-10-31,660,,-0.005983,0.02467,0.309685,0.494859,SK하이닉스,per_30,3
3,2023-10-31,720,,-0.087432,-0.181373,-0.182375,-0.04298,현대건설,per_30,4
4,2023-10-31,810,,-0.031835,0.148889,0.1566,0.193995,삼성화재,per_30,5
9,2023-10-31,3670,,-0.359079,-0.367647,-0.304412,0.32493,포스코퓨처엠,per_90,1
61,2023-10-31,47050,,-0.311813,0.320158,0.828467,1.02834,포스코인터내셔널,per_90,2
63,2023-10-31,51900,,-0.305066,-0.361982,-0.489482,-0.552482,LG생활건강,per_90,3
20,2023-10-31,8770,,-0.28473,-0.144231,-0.214376,-0.228005,호텔신라,per_90,4
83,2023-10-31,241560,,-0.261905,-0.327257,-0.243164,0.088483,두산밥캣,per_90,5
