In [2]:
import math
import numpy as np
import pandas as pd
import os
import glob

from pykrx import stock
from pykrx import bond

import time
from time import sleep
from datetime import datetime
from datetime import timedelta

from pyarrow import csv
import pyarrow as pa
import pyarrow.parquet as pq

from ta.trend import MACD
from ta.momentum import StochasticOscillator

import psycopg2 as pg2
from sqlalchemy import create_engine

from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage


import warnings
warnings.filterwarnings('ignore')

# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')


# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owenchoi-404302'
dataset_id = 'finance_mlops'

# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials,
                         project = credentials.project_id)
bucket_name = 'finance-mlops-proj'    # 서비스 계정 생성한 bucket 이름 입력


now = datetime.now()
# now = now + timedelta(days=-2)
today_date1 = now.strftime('%Y%m%d')
today_date2 = now.strftime('%Y-%m-%d')
today_date_time_csv = now.strftime("%Y%m%d_%H%M")

today_date1 = '20231226'
today_date2 = '2023-12-26'

In [3]:
source_blob_name = 'data_crawler/kor_stock_ohlcv/kor_stock_ohlcv.csv'
destination_file_name = 'data_crawler/kor_stock_ohlcv/kor_stock_ohlcv_cleaning.csv'

# gcs
bucket = storage_client.bucket(bucket_name) # Bucket 접속
blob = bucket.blob(source_blob_name) # 저장되어 있는 파일 연결
blob.download_to_filename(destination_file_name) # 파일 다운로드

ohlcv_df_raw = pd.read_csv(destination_file_name)


source_blob_name = 'data_crawler/kor_ticker_list/kor_ticker_list.csv'
destination_file_name = 'data_crawler/kor_ticker_list/kor_ticker_list_cleaning.csv'

# kor_ticker_list
bucket = storage_client.bucket(bucket_name) # Bucket 접속
blob = bucket.blob(source_blob_name) # 저장되어 있는 파일 연결
blob.download_to_filename(destination_file_name) # 파일 다운로드


kor_ticker_list = pd.read_csv(destination_file_name)

# kor_ticker_list = pd.read_csv('data_crawler/kor_ticker_list/kor_ticker_list.csv')
# ohlcv_df_raw = pd.read_csv('data_crawler/kor_stock_ohlcv/kor_stock_ohlcv.csv')

ohlcv_df_raw['ticker'] = ohlcv_df_raw['ticker'].astype('str')
ohlcv_df_raw['ticker'] = ohlcv_df_raw['ticker'].str.zfill(6)

ticker_list = ohlcv_df_raw['ticker'].unique()

In [4]:


df_raw_total = pd.DataFrame()
df_raw_anal_total = pd.DataFrame()

for ticker_nm in ticker_list[:5]:
    df_raw = ohlcv_df_raw[ohlcv_df_raw['ticker'] == ticker_nm].reset_index(drop = True)

    ######################################################################
    # 보조지표
    ######################################################################

    # 이동평균선
    df_raw['MA5'] = df_raw['close'].rolling(window=5).mean()
    df_raw['MA20'] = df_raw['close'].rolling(window=20).mean()
    df_raw['MA60'] = df_raw['close'].rolling(window=60).mean()
    df_raw['MA120'] = df_raw['close'].rolling(window=120).mean()

    # 볼린저밴드
    std = df_raw['close'].rolling(20).std(ddof=0)

    df_raw['upper'] = df_raw['MA20'] + 2 * std
    df_raw['lower'] = df_raw['MA20'] - 2 * std

    # MACD
    # MACD
    macd = MACD(close=df_raw['close'],
                window_slow=26,
                window_fast=12,
                window_sign=9)


    df_raw['MACD_DIFF'] = macd.macd_diff()
    df_raw['MACD'] = macd.macd()
    df_raw['MACD_Signal'] = macd.macd_signal()

    # RSI
    df_raw['변화량'] = df_raw['close'] - df_raw['close'].shift(1)
    df_raw['상승폭'] = np.where(df_raw['변화량']>=0, df_raw['변화량'], 0)
    df_raw['하락폭'] = np.where(df_raw['변화량'] <0, df_raw['변화량'].abs(), 0)

    # welles moving average
    df_raw['AU'] = df_raw['상승폭'].ewm(alpha=1/14, min_periods=14).mean()
    df_raw['AD'] = df_raw['하락폭'].ewm(alpha=1/14, min_periods=14).mean()
    df_raw['RSI'] = df_raw['AU'] / (df_raw['AU'] + df_raw['AD']) * 100

    df_raw['MA5-20'] = df_raw['MA5'] - df_raw['MA20']
    df_raw['MA20-60'] = df_raw['MA20'] - df_raw['MA60']
    df_raw['MA60-120'] = df_raw['MA60'] - df_raw['MA120']


    ######################################################################
    # 보조지표 분석
    ######################################################################
    df_raw_anal = df_raw[['date','ticker', 'close']]

    # 골든크로스
    # 골든 크로스 5-20
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_5_20_gold_cross = [idx for idx in range(len(df_raw)) if df_raw["MA5-20"].iloc[idx] > 0 and df_raw["MA5-20"].iloc[idx - 1] <= 0]

    # 데드 크로스 5-20
    # 양수에서 음수로 바뀌는 모든 인덱스 찾기
    idx_5_20_dead_cross = [idx for idx in range(len(df_raw)) if df_raw["MA5-20"].iloc[idx] < 0 and df_raw["MA5-20"].iloc[idx - 1] >= 0]

    # 골든 크로스 20-60
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_20_60_gold_cross = [idx for idx in range(len(df_raw)) if df_raw["MA20-60"].iloc[idx] > 0 and df_raw["MA20-60"].iloc[idx - 1] <= 0]

    # 골든 크로스 20-60
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_20_60_dead_cross = [idx for idx in range(len(df_raw)) if df_raw["MA20-60"].iloc[idx] < 0 and df_raw["MA20-60"].iloc[idx - 1] >= 0]


    df_raw_anal.loc[:, '5_20_cross'] = '-'
    df_raw_anal.loc[idx_5_20_gold_cross,'5_20_cross'] = '골든크로스(매수)'
    df_raw_anal.loc[idx_5_20_dead_cross,'5_20_cross'] = '데드크로스(매도)'

    df_raw_anal.loc[:, '20_60_cross'] = '-'
    df_raw_anal.loc[idx_20_60_gold_cross,'20_60_cross'] = '골든크로스(매수)'
    df_raw_anal.loc[idx_20_60_dead_cross,'20_60_cross'] = '데드크로스(매도)'


    # 정배열 역배열
    ascending_sq  = (df_raw['MA5-20'] > 0) & \
    (df_raw['MA20-60'] > 0) & \
    (df_raw['MA60-120'] > 0)

    descending_sq  = (df_raw['MA5-20'] < 0) & \
    (df_raw['MA20-60'] < 0) & \
    (df_raw['MA60-120'] < 0)

    df_raw_anal.loc[:,'array'] = '-'
    df_raw_anal.loc[ascending_sq,'array'] = '정배열(매수)'
    df_raw_anal.loc[descending_sq,'array'] = '역배열(매도)'


    # 볼린저밴드
    down_reg_sq = df_raw['upper'] - df_raw['close']
    top_reg_sq  = df_raw['lower'] - df_raw['close']

    down_reg = [idx for idx in range(1,len(df_raw)) if down_reg_sq[idx] > 0 and down_reg_sq[idx-1] <= 0]
    top_reg = [idx for idx in range(1,len(df_raw)) if top_reg_sq[idx] < 0 and top_reg_sq[idx-1] >= 0]

    df_raw_anal.loc[:,'Bollinger_band'] = '-'
    df_raw_anal.loc[down_reg,'Bollinger_band'] = '하향회귀(매도)'
    df_raw_anal.loc[top_reg,'Bollinger_band'] = '상향회귀(매수)'


    # MACD
    signal_down_cross = [idx for idx in range(1,len(df_raw)) if df_raw['MACD_DIFF'][idx] < 0 and df_raw['MACD_DIFF'][idx-1] >= 0]
    signal_top_corss = [idx for idx in range(1,len(df_raw)) if df_raw['MACD_DIFF'][idx] > 0 and df_raw['MACD_DIFF'][idx-1] <= 0]

    df_raw_anal.loc[:,'MACD'] = '-'
    df_raw_anal.loc[signal_down_cross,'MACD'] = '하향돌파(매도)'
    df_raw_anal.loc[signal_top_corss,'MACD'] = '상향돌파(매수)'

    # RSI
    down_reg = [idx for idx in range(1,len(df_raw)) if df_raw['RSI'][idx] > 70 and df_raw['RSI'][idx-1] <= 70]
    top_reg = [idx for idx in range(1,len(df_raw)) if df_raw['RSI'][idx] < 30 and df_raw['RSI'][idx-1] >= 30]


    df_raw_anal.loc[:,'RSI'] = '-'
    df_raw_anal.loc[down_reg,'RSI'] = 'RSI 상단 하향돌파(매도)'
    df_raw_anal.loc[top_reg,'RSI'] = 'RSI 하단 상향 돌파(매수)'


    df_raw_total = pd.concat([df_raw_total, df_raw])
    df_raw_anal_total = pd.concat([df_raw_anal_total, df_raw_anal])

    print(ticker_nm)


df_raw_total = df_raw_total.reset_index(drop = True)






095570
006840
027410
282330
138930


In [None]:

df_raw_total['ticker'] = df_raw_total['ticker'].astype('str')
df_raw_anal_total['ticker'] = df_raw_anal_total['ticker'].astype('str')

kor_ticker_list['ticker'] = kor_ticker_list['ticker'].astype('str')

df_raw_total['ticker'] = df_raw_total['ticker'].str.zfill(6)
df_raw_anal_total['ticker'] = df_raw_anal_total['ticker'].str.zfill(6)
kor_ticker_list['ticker'] = kor_ticker_list['ticker'].str.zfill(6)




df_raw_total_2 = pd.merge(df_raw_total, kor_ticker_list,
        on = 'ticker',
        how = 'left')


df_raw_anal_total_2 = pd.merge(df_raw_anal_total, kor_ticker_list,
        on = 'ticker',
        how = 'left')



df_raw_total_2 = df_raw_total_2[df_raw_total_2['date'] > '2023-01-01'].reset_index(drop = True)
df_raw_anal_total_2 = df_raw_anal_total_2[df_raw_anal_total_2['date'] > '2023-01-01'].reset_index(drop = True)


df_raw_total_2_kospi = df_raw_total_2[df_raw_total_2['market'] == 'KOSPI'].reset_index(drop = True)
df_raw_total_2_kosdaq = df_raw_total_2[df_raw_total_2['market'] == 'KOSDAQ'].reset_index(drop = True)

df_raw_anal_total_2_kospi = df_raw_anal_total_2[df_raw_anal_total_2['market'] == 'KOSPI'].reset_index(drop = True)
df_raw_anal_total_2_kosdaq = df_raw_anal_total_2[df_raw_anal_total_2['market'] == 'KOSDAQ'].reset_index(drop = True)



table_from_pandas = pa.Table.from_pandas(df_raw_total_2_kospi,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_total_2_kospi_{today_date1}.parquet')

table_from_pandas = pa.Table.from_pandas(df_raw_total_2_kosdaq,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_total_2_kosdaq_{today_date1}.parquet')

table_from_pandas = pa.Table.from_pandas(df_raw_anal_total_2_kospi,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_anal_total_2_kospi_{today_date1}.parquet')

table_from_pandas = pa.Table.from_pandas(df_raw_anal_total_2_kosdaq,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_anal_total_2_kosdaq_{today_date1}.parquet')





# Google Storage 적재
source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_total_2_kospi_{today_date1}.parquet'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_total_2_kospi_{today_date1}.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)



# Google Storage 적재
source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_anal_total_2_kospi_{today_date1}.parquet'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_anal_total_2_kospi_{today_date1}.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)



# Google Storage 적재
source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_anal_total_2_kosdaq_{today_date1}.parquet'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/df_raw_anal_total_2_kosdaq_{today_date1}.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)



# # Google Storage 적재
# source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/kor_index_list_df_{today_date1}.parquet'    # GCP에 업로드할 파일 절대경로
# destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/kor_index_list_df_{today_date1}.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
# bucket = storage_client.bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
# blob.upload_from_filename(source_file_name)


In [19]:
def rank_per(df, rank_count):
    max_value_1 = df1[df1['market'] == 'KOSPI'].sort_values(by ='price_change_percentage', ascending = False).head(rank_count)
    max_value_2 = df1[df1['market'] == 'KOSDAQ'].sort_values(by ='price_change_percentage', ascending = False).head(rank_count)
    min_value_1 = df1[df1['market'] == 'KOSPI'].sort_values(by ='price_change_percentage', ascending = True).head(rank_count)
    min_value_2 = df1[df1['market'] == 'KOSDAQ'].sort_values(by ='price_change_percentage', ascending = True).head(rank_count)
    
    total = pd.concat([max_value_1, max_value_2, 
               min_value_1, min_value_2
              ])
    total = total.reset_index(drop = True)
    return total

### 주가 등락 하루

In [7]:
df2 = rank_per(df1, 5)


In [8]:
df2[df2['market'] == 'KOSDAQ']

Unnamed: 0,date,open,high,low,close,volume,trading_value,price_change_percentage,ticker,corp_name,market
5,2023-07-21,3385,7150,3010,3860,100154716,512206106270,93.0,455910,에스케이증권제9호스팩,KOSDAQ
6,2023-07-21,10200,12870,10120,12870,3260154,39664501300,30.0,226330,신테카바이오,KOSDAQ
7,2023-07-21,850,1086,817,1086,18898109,19434172560,29.9,204840,지엘팜텍,KOSDAQ
8,2023-07-21,1622,2145,1530,2145,14951692,29492994350,29.84,84440,유비온,KOSDAQ
9,2023-07-21,20900,27050,20850,26050,2886493,72432105050,22.88,39200,오스코텍,KOSDAQ
15,2023-07-21,13720,13920,9780,9780,1557558,16557138820,-29.94,80580,오킨스전자,KOSDAQ
16,2023-07-21,4610,4760,4405,4450,2074704,9452997095,-17.59,294140,레몬,KOSDAQ
17,2023-07-21,17840,19800,15670,15800,594055,10298655330,-13.0,322180,티라유텍,KOSDAQ
18,2023-07-21,2570,2605,2240,2240,6328884,15417219195,-12.16,121850,코이즈,KOSDAQ
19,2023-07-21,5810,8300,5700,6070,2723695,19192102980,-10.21,224060,더코디,KOSDAQ


In [11]:
df2

Unnamed: 0,date,open,high,low,close,volume,trading_value,price_change_percentage,ticker,corp_name,market
0,2023-07-21,1069,1389,1060,1389,27158165,35382009557,29.93,7460,에이프로젠,KOSPI
1,2023-07-21,339,432,337,397,58559277,23596024910,16.42,3060,에이프로젠바이오로직스,KOSPI
2,2023-07-21,730,913,730,830,3940168,3335433818,13.7,12170,아센디오,KOSPI
3,2023-07-21,111700,128900,110900,122400,9514146,1152769400900,12.81,1570,금양,KOSPI
4,2023-07-21,9960,11680,9810,10900,3565427,39068255630,11.91,79160,CJ CGV,KOSPI
5,2023-07-21,3385,7150,3010,3860,100154716,512206106270,93.0,455910,에스케이증권제9호스팩,KOSDAQ
6,2023-07-21,10200,12870,10120,12870,3260154,39664501300,30.0,226330,신테카바이오,KOSDAQ
7,2023-07-21,850,1086,817,1086,18898109,19434172560,29.9,204840,지엘팜텍,KOSDAQ
8,2023-07-21,1622,2145,1530,2145,14951692,29492994350,29.84,84440,유비온,KOSDAQ
9,2023-07-21,20900,27050,20850,26050,2886493,72432105050,22.88,39200,오스코텍,KOSDAQ


In [60]:
df2[df2['market'] == 'KOSPI'].pivot(index = 'date', columns = 'corp_name', values = 'price_change_percentage')

corp_name,CJ CGV,HMM,금양,남선알미우,동양우,씨아이테크,아센디오,에이프로젠,에이프로젠바이오로직스,이엔플러스
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-07-21,11.91,-5.91,12.81,-6.57,-6.29,-8.62,13.7,29.93,16.42,-13.19


In [29]:
kor_stock_ohlcv_005930 = kor_stock_ohlcv_2[kor_stock_ohlcv_2['ticker'] == '005930'].reset_index()

In [30]:
kor_stock_ohlcv_005930

Unnamed: 0,index,date,open,high,low,close,volume,trading_value,price_change_percentage,ticker,corp_name,market
0,572165,2018-01-02,51380,51400,50780,51020,169485,432677351468,0.12,005930,삼성전자,KOSPI
1,572166,2018-01-03,52540,52560,51420,51620,200270,518345810160,1.18,005930,삼성전자,KOSPI
2,572167,2018-01-04,52120,52180,50640,51080,233909,600531577700,-1.05,005930,삼성전자,KOSPI
3,572168,2018-01-05,51300,52120,51200,52120,189623,490792925116,2.04,005930,삼성전자,KOSPI
4,572169,2018-01-08,52400,52520,51500,52020,167673,435974098536,-0.19,005930,삼성전자,KOSPI
...,...,...,...,...,...,...,...,...,...,...,...,...
1365,573530,2023-07-17,73200,73500,72800,73300,10060049,736819227400,-0.14,005930,삼성전자,KOSPI
1366,573531,2023-07-18,73200,73500,72000,72000,11697900,847564074902,-1.77,005930,삼성전자,KOSPI
1367,573532,2023-07-19,72700,72800,71300,71700,10896412,782147498048,-0.42,005930,삼성전자,KOSPI
1368,573533,2023-07-20,71100,71500,70800,71000,9732730,692487601800,-0.98,005930,삼성전자,KOSPI


### 주가 등락률 일주일

### 주가 등락 한달

### 주가 등락 1년