In [1]:
import math
import numpy as np
import pandas as pd
import os
import glob

from pykrx import stock
from pykrx import bond

import time
from time import sleep
from datetime import datetime
from datetime import timedelta

from pyarrow import csv
import pyarrow as pa
import pyarrow.parquet as pq

from ta.trend import MACD
from ta.momentum import StochasticOscillator

import psycopg2 as pg2
from sqlalchemy import create_engine

from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage


import warnings
warnings.filterwarnings('ignore')

# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')


# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owenchoi-404302'
dataset_id = 'finance_mlops'


# BigQuery 클라이언트 객체 생성
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)


# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials,
                         project = credentials.project_id)
bucket_name = 'finance-mlops-proj'    # 서비스 계정 생성한 bucket 이름 입력

# Postgresql 연결
db_connect_info = pd.read_csv('key_value/db_connect_info.csv')
username = db_connect_info['username'][0]
password = db_connect_info['password'][0]
host = db_connect_info['host'][0]
database = db_connect_info['database'][0]
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:5432/{database}')



now = datetime.now()
# now = now + timedelta(days=-2)
today_date1 = now.strftime('%Y%m%d')
today_date2 = now.strftime('%Y-%m-%d')
today_date_time_csv = now.strftime("%Y%m%d_%H%M")



now = datetime.now()
now = now + timedelta(days=-365 * 2)
set_date_1 = now.strftime('%Y%m%d')
query_date = now.strftime('%Y-%m-%d')

In [51]:
index_code_master = pd.read_csv('data_crawler/index_code_master/index_code_master.csv', dtype = {'ticker':str,
                                                                                                'index_code':str})
t = pq.read_table('data_crawler/cleaning/kor_index_ohlcv/kor_index_ohlcv_cleaning.parquet')
kor_index_ohlcv_cleaning = t.to_pandas()

t = pq.read_table('data_crawler/kor_index_list_df/kor_index_list_df.parquet')
kor_index_list_df = t.to_pandas()


In [64]:

kor_index_ohlcv = kor_index_ohlcv[kor_index_ohlcv['date'] > today_date2]

df = kor_index_ohlcv.groupby(['index_code'])['close'].apply(list).reset_index()


df2 = pd.merge(kor_index_list_df, df, 
        on = 'index_code', 
        how = 'left')
        
df_kospi = df2[df2['market'] == 'KOSPI']    
df_kospi

Unnamed: 0,index_code,index_code_nm,market,close
0,1001,코스피,KOSPI,"[2525.36, 2535.27, 2510.66, 2544.18, 2563.56, ..."
1,1002,코스피 대형주,KOSPI,"[2506.27, 2517.67, 2490.82, 2528.88, 2548.28, ..."
2,1003,코스피 중형주,KOSPI,"[2751.66, 2756.56, 2744.49, 2759.82, 2757.57, ..."
3,1004,코스피 소형주,KOSPI,"[2284.71, 2280.58, 2269.27, 2271.36, 2275.93, ..."
4,1005,음식료품,KOSPI,"[3648.26, 3658.3, 3663.02, 3656.46, 3632.11, 3..."
5,1006,섬유의복,KOSPI,"[284.1, 287.37, 284.42, 285.9, 291.13, 291.54,..."
6,1007,종이목재,KOSPI,"[297.62, 296.05, 294.91, 293.76, 292.45, 294.8..."
7,1008,화학,KOSPI,"[4722.24, 4719.06, 4672.67, 4747.36, 4840.56, ..."
8,1009,의약품,KOSPI,"[12945.78, 13067.44, 12928.58, 12997.72, 12705..."
9,1010,비금속광물,KOSPI,"[3921.46, 3927.78, 3898.01, 3864.13, 3886.77, ..."


In [77]:
index_code_master

Unnamed: 0,ticker,index_code,index_code_nm,market,corp_name
0,005930,1001,코스피,KOSPI,삼성전자
1,000660,1001,코스피,KOSPI,SK하이닉스
2,373220,1001,코스피,KOSPI,LG에너지솔루션
3,207940,1001,코스피,KOSPI,삼성바이오로직스
4,005490,1001,코스피,KOSPI,POSCO홀딩스
...,...,...,...,...,...
13730,122870,2218,코스닥 150 커뮤니케이션서비스,KOSDAQ,와이지엔터테인먼트
13731,119860,2218,코스닥 150 커뮤니케이션서비스,KOSDAQ,커넥트웨이브
13732,078340,2218,코스닥 150 커뮤니케이션서비스,KOSDAQ,컴투스
13733,095660,2218,코스닥 150 커뮤니케이션서비스,KOSDAQ,네오위즈


In [74]:
index_list_df = index_code_master[index_code_master['ticker'] == '005930'].reset_index(drop = True)

index_list_df

Unnamed: 0,ticker,index_code,index_code_nm,market,corp_name
0,5930,1001,코스피,KOSPI,삼성전자
1,5930,1002,코스피 대형주,KOSPI,삼성전자
2,5930,1013,전기전자,KOSPI,삼성전자
3,5930,1027,제조업,KOSPI,삼성전자
4,5930,1028,코스피 200,KOSPI,삼성전자
5,5930,1034,코스피 100,KOSPI,삼성전자
6,5930,1035,코스피 50,KOSPI,삼성전자
7,5930,1155,코스피 200 정보기술,KOSPI,삼성전자
8,5930,1224,코스피 200 비중상한 30%,KOSPI,삼성전자
9,5930,1227,코스피 200 비중상한 25%,KOSPI,삼성전자


In [72]:
# ### 날짜 설정
now = datetime.now()
now = now + timedelta(days=-30)

today_date2 = now.strftime('%Y-%m-%d')

kor_index_ohlcv_cleaning = kor_index_ohlcv_cleaning[kor_index_ohlcv_cleaning['date'] > today_date2]

df = kor_index_ohlcv_cleaning.groupby(['index_code','index_code_nm'])['close'].apply(list).reset_index()

index_list_df = index_code_master[index_code_master['ticker'] == '373220'].reset_index(drop = True)

df_2 = df[df['index_code'].isin(index_list_df['index_code'])].reset_index(drop = True)
df_2
# st.dataframe(
#     df_kospi,
#     column_config={
#         "index_code": "App name",
#         "index_name": "App index_name",
#         "index_market": "App index_market",
#         "url": st.column_config.LinkColumn("App URL"),
#         "close": st.column_config.LineChartColumn(
#             "Views (past 30 days)", 
#         ),
#     },
#     hide_index=True,
# )


Unnamed: 0,index_code,index_code_nm,close
0,1001,코스피,"[2525.36, 2535.27, 2510.66, 2544.18, 2563.56, ..."
1,1002,코스피 대형주,"[2506.27, 2517.67, 2490.82, 2528.88, 2548.28, ..."
2,1013,전기전자,"[26583.46, 26707.18, 26365.21, 26816.47, 27033..."
3,1027,제조업,"[6723.03, 6753.03, 6673.47, 6760.39, 6824.4, 6..."
4,1028,코스피 200,"[337.93, 339.77, 336.68, 341.33, 343.99, 343.5..."
5,1034,코스피 100,"[2545.63, 2559.4, 2536.25, 2572.73, 2595.1, 25..."
6,1035,코스피 50,"[2332.22, 2344.29, 2322.69, 2359.51, 2381.56, ..."
7,1159,코스피 200 산업재,"[608.85, 615.83, 604.4, 621.45, 627.62, 643.01..."
8,1182,코스피 200 초대형제외 지수,"[238.23, 239.38, 237.25, 241.58, 244.01, 244.1..."
9,1224,코스피 200 비중상한 30%,"[337.32, 339.15, 336.07, 340.72, 343.39, 342.9..."


In [59]:
df_2

Unnamed: 0,index_code,index_code_nm,close
0,1001,코스피,"[2225.67, 2218.68, 2255.98, 2264.65, 2289.97, ..."
1,1002,코스피 대형주,"[2199.54, 2192.5, 2234.07, 2242.73, 2269.99, 2..."
2,1013,전기전자,"[21357.68, 21326.61, 22056.14, 22019.57, 22362..."
3,1027,제조업,"[5777.72, 5747.29, 5842.77, 5832.99, 5906.54, ..."
4,1028,코스피 200,"[289.79, 289.58, 295.98, 297.87, 301.53, 309.6..."
5,1034,코스피 100,"[2178.14, 2177.41, 2228.97, 2244.75, 2273.23, ..."
6,1035,코스피 50,"[1981.7, 1983.11, 2033.9, 2049.92, 2076.39, 21..."
7,1159,코스피 200 산업재,"[567.2, 560.27, 565.29, 557.76, 563.71, 580.23..."
8,1182,코스피 200 초대형제외 지수,"[214.93, 214.87, 217.85, 219.19, 221.76, 227.6..."
9,1224,코스피 200 비중상한 30%,"[289.26, 289.05, 295.44, 297.33, 300.98, 309.1..."


In [52]:


df2 = pd.merge(kor_index_list_df, df, 
        on = 'index_code', 
        how = 'left')
df2

ValueError: You are trying to merge on int64 and object columns for key 'index_code'. If you wish to proceed you should use pd.concat

In [38]:
index_list_df = index_code_master[index_code_master['ticker'] == '373220'].reset_index(drop = True)

In [39]:
index_list_df

Unnamed: 0,ticker,index_code,index_code_nm,market,corp_name
0,373220,1001,코스피,KOSPI,LG에너지솔루션
1,373220,1002,코스피 대형주,KOSPI,LG에너지솔루션
2,373220,1013,전기전자,KOSPI,LG에너지솔루션
3,373220,1027,제조업,KOSPI,LG에너지솔루션
4,373220,1028,코스피 200,KOSPI,LG에너지솔루션
5,373220,1034,코스피 100,KOSPI,LG에너지솔루션
6,373220,1035,코스피 50,KOSPI,LG에너지솔루션
7,373220,1159,코스피 200 산업재,KOSPI,LG에너지솔루션
8,373220,1182,코스피 200 초대형제외 지수,KOSPI,LG에너지솔루션
9,373220,1224,코스피 200 비중상한 30%,KOSPI,LG에너지솔루션


In [43]:
kor_index_ohlcv_cleaning_2 = kor_index_ohlcv_cleaning[kor_index_ohlcv_cleaning['index_code'].isin(index_list_df['index_code'])].reset_index(drop = True)


In [44]:
kor_index_ohlcv_cleaning_2

Unnamed: 0,date,open,high,low,close,volume,trading_value,market_cap,index_code,index_code_nm,...,MACD_Signal,변화량,상승폭,하락폭,AU,AD,RSI,MA5-20,MA20-60,MA60-120
0,2023-01-02 00:00:00+00:00,2249.95,2259.88,2222.37,2225.67,346344799,5200137586818,1759241799519040,1001,코스피,...,-19.659065,-10.73,0.00,10.73,6.386393,15.439962,29.260008,-66.4275,-0.274167,-23.953000
1,2023-01-03 00:00:00+00:00,2230.98,2230.98,2180.67,2218.68,410245325,6149082624890,1753771077018843,1001,코스피,...,-23.522861,-6.99,0.00,6.99,5.930222,14.836394,28.556518,-76.0875,-10.070167,-23.175750
2,2023-01-04 00:00:00+00:00,2205.98,2260.06,2198.82,2255.98,412841149,6487597995523,1783808765569816,1001,코스피,...,-26.644235,37.30,37.30,0.00,8.170921,13.776651,37.229270,-84.5905,-17.994333,-21.595750
3,2023-01-05 00:00:00+00:00,2268.20,2281.39,2252.97,2264.65,430977022,7521178466245,1791816776272757,1001,코스피,...,-28.937594,8.67,8.67,0.00,8.206569,12.792605,39.080439,-81.8425,-24.938667,-20.026417
4,2023-01-06 00:00:00+00:00,2253.40,2300.62,2253.27,2289.97,398606581,6764112169245,1811418923114710,1001,코스피,...,-30.121827,25.32,25.32,0.00,9.428957,11.878847,44.251191,-67.0730,-31.112500,-17.638500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3232,2023-12-28 00:00:00+00:00,1319.30,1341.58,1317.49,1341.58,25033603,2913283792218,955323878351500,1894,코스피 200 TOP 10,...,14.598623,27.73,27.73,0.00,8.991678,3.690104,70.902324,39.7910,29.185167,-49.295417
3233,2024-01-02 00:00:00+00:00,1331.45,1347.10,1326.28,1340.57,22964646,2683786714913,962342167077300,1894,코스피 200 TOP 10,...,16.305693,-1.01,0.00,1.01,8.349416,3.498668,70.470601,44.4140,30.809000,-47.523333
3234,2024-01-03 00:00:00+00:00,1323.35,1323.35,1299.94,1299.94,29310048,3220867167424,931966067485700,1894,코스피 200 TOP 10,...,17.301056,-40.63,0.00,40.63,7.753029,6.150906,55.761401,42.0270,32.572000,-46.409417
3235,2024-01-04 00:00:00+00:00,1292.04,1301.42,1286.17,1292.30,21663561,2404747950702,927454180294700,1894,코스피 200 TOP 10,...,17.636831,-7.64,0.00,7.64,7.199241,6.257270,53.500057,38.4715,34.351833,-45.548917


In [35]:
kor_index_ohlcv_cleaning[kor_index_ohlcv_cleaning['index_code'] == '1227']

Unnamed: 0,date,open,high,low,close,volume,trading_value,market_cap,index_code,index_code_nm,...,MACD_Signal,변화량,상승폭,하락폭,AU,AD,RSI,MA5-20,MA20-60,MA60-120
22908,2023-01-02 00:00:00+00:00,289.95,291.04,286.27,286.80,92544049,3817327305505,1535786624843340,1227,코스피 200 비중상한 25%,...,-2.244264,-1.40,0.00,1.40,0.883306,2.044262,30.171999,-8.3430,0.415667,-4.333667
22909,2023-01-03 00:00:00+00:00,287.68,287.89,281.40,286.61,106623597,4524745849460,1530480035423500,1227,코스피 200 비중상한 25%,...,-2.745319,-0.19,0.00,0.19,0.820212,1.911814,30.022119,-9.6495,-0.774667,-4.190083
22910,2023-01-04 00:00:00+00:00,285.29,293.35,284.87,292.69,109947475,5193513528314,1557422148946850,1227,코스피 200 비중상한 25%,...,-3.126749,6.08,6.08,0.00,1.195911,1.775256,40.250554,-10.6200,-1.671333,-3.931583
22911,2023-01-05 00:00:00+00:00,294.33,296.37,292.77,294.56,137287857,6003133980930,1563535980673890,1227,코스피 200 비중상한 25%,...,-3.376243,1.87,1.87,0.00,1.244061,1.648452,43.009684,-9.9970,-2.377500,-3.679000
22912,2023-01-06 00:00:00+00:00,293.32,299.67,293.20,298.16,121021827,5367994107791,1581913625677170,1227,코스피 200 비중상한 25%,...,-3.464833,3.60,3.60,0.00,1.412342,1.530706,47.989098,-7.6615,-3.033667,-3.330500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23152,2023-12-28 00:00:00+00:00,346.40,351.73,345.74,351.73,108348110,7011015334208,1865396196690940,1227,코스피 200 비중상한 25%,...,4.478224,5.72,5.72,0.00,1.805908,0.591479,75.328183,9.1595,11.601167,-4.155500
23153,2024-01-02 00:00:00+00:00,350.14,354.99,349.66,354.04,106708510,7230435484144,1875294389545740,1227,코스피 200 비중상한 25%,...,4.813417,2.31,2.31,0.00,1.841915,0.549230,77.030652,10.2685,11.993333,-3.668083
23154,2024-01-03 00:00:00+00:00,350.39,350.39,345.06,345.06,117690135,7242146169830,1828846321048100,1227,코스피 200 비중상한 25%,...,5.011907,-8.98,0.00,8.98,1.710349,1.151428,59.765278,9.8425,12.252667,-3.267750
23155,2024-01-04 00:00:00+00:00,343.05,344.47,341.02,341.87,103140948,5817672265149,1814393097178020,1227,코스피 200 비중상한 25%,...,5.051992,-3.19,0.00,3.19,1.588181,1.297041,55.045383,8.7815,12.353333,-2.939583
