# 지표 정리

### 기본 세팅

In [1]:
import math
import numpy as np
import pandas as pd
import os
import glob

from pykrx import stock
from pykrx import bond

import time
from time import sleep
from datetime import datetime
from datetime import timedelta

from pyarrow import csv
import pyarrow as pa
import pyarrow.parquet as pq

from ta.trend import MACD
from ta.momentum import StochasticOscillator

import psycopg2 as pg2
from sqlalchemy import create_engine

from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage


import warnings
warnings.filterwarnings('ignore')

# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')


# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owenchoi-404302'
dataset_id = 'finance_mlops'


# BigQuery 클라이언트 객체 생성
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)


# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials,
                         project = credentials.project_id)
bucket_name = 'finance-mlops-proj'    # 서비스 계정 생성한 bucket 이름 입력

# Postgresql 연결
db_connect_info = pd.read_csv('key_value/db_connect_info.csv')
username = db_connect_info['username'][0]
password = db_connect_info['password'][0]
host = db_connect_info['host'][0]
database = db_connect_info['database'][0]
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:5432/{database}')



now = datetime.now()
# now = now + timedelta(days=-2)
today_date1 = now.strftime('%Y%m%d')
today_date2 = now.strftime('%Y-%m-%d')
today_date_time_csv = now.strftime("%Y%m%d_%H%M")

## 데이터 

In [2]:
glob.glob("**/*.csv", recursive=True)

['jupyter/ohlcv_df_raw.csv',
 'data_crawler/ohlcv_df_raw.csv',
 'data_crawler/sample.csv',
 'data_crawler/df_raw_total.csv',
 'data_crawler/kor_index_code_fundamental/kor_index_code_fundamental.csv',
 'data_crawler/index_code_master/index_code_master.csv',
 'data_crawler/kor_market_cap/kor_market_cap.csv',
 'data_crawler/snp500_daily/snp500_daily.csv',
 'data_crawler/bitcoin/bitcoin.csv',
 'data_crawler/usd_krw/usd_krw.csv',
 'data_crawler/kor_stock_ohlcv/kor_stock_ohlcv.csv',
 'data_crawler/kor_ticker_list/kor_ticker_list_20240108.csv',
 'data_crawler/kor_ticker_list/kor_ticker_list.csv',
 'data_crawler/kor_index_list_df/kor_index_list_df.csv',
 'data_crawler/kor_index_ohlcv/kor_index_ohlcv.csv',
 'data_crawler/kor_stock_fundamental/kor_stock_fundamental.csv',
 'data_crawler/snp500_ticker_list/snp500_ticker_list.csv',
 'key_value/db_connect_info.csv',
 'key_value/naver_apikey.csv',
 'key_value/chatgpt_apikey.csv']

In [3]:
glob.glob("**/*.parquet", recursive=True)

['jupyter/df.parquet',
 'jupyter/kor_stock_ohlcv.parquet',
 'data_crawler/cleaning/bitcoin/bitcoin_anal_cleaning.parquet',
 'data_crawler/cleaning/bitcoin/bitcoin_cleaning.parquet',
 'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_anal_kosdaq.parquet',
 'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kosdaq.parquet',
 'data_crawler/cleaning/kor_stock_ohlcv/buy_sell_count_kosdaq.parquet',
 'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_anal_kospi.parquet',
 'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet',
 'data_crawler/cleaning/kor_stock_ohlcv/buy_sell_count_kospi.parquet',
 'data_crawler/cleaning/kor_index_ohlcv/kor_index_ohlcv_cleaning.parquet',
 'data_crawler/cleaning/kor_index_ohlcv/kor_index_ohlcv_anal_cleaning.parquet',
 'data_crawler/kor_index_code_fundamental/kor_index_code_fundamental.parquet',
 'data_crawler/kor_index_list_df/kor_index_list_df.parquet',
 'data_crawler/kor_index_ohlcv/kor_index_ohlcv.parquet']

In [4]:
# 코스피 지표
t = pq.read_table('data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet')
kor_stock_ohlcv_kospi = t.to_pandas()

# 코스피 매수/매도 지표
t = pq.read_table('data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_anal_kospi.parquet')
kor_stock_ohlcv_anal_kospi = t.to_pandas()

# 코스피 주식별 매수매도 지표
t = pq.read_table('data_crawler/cleaning/kor_stock_ohlcv/buy_sell_count_kospi.parquet')
buy_sell_count_kospi = t.to_pandas()

# 인덱스 지표
t = pq.read_table('data_crawler/cleaning/kor_index_ohlcv/kor_index_ohlcv_cleaning.parquet')
kor_index_ohlcv_cleaning = t.to_pandas()

# 인덱스 매수/매도 지표
t = pq.read_table('data_crawler/cleaning/kor_index_ohlcv/kor_index_ohlcv_anal_cleaning.parquet')
kor_index_ohlcv_anal_cleaning = t.to_pandas()

# 인덱스 PBR
t = pq.read_table('data_crawler/kor_index_code_fundamental/kor_index_code_fundamental.parquet')
kor_index_code_fundamental = t.to_pandas()


index_code_master = pd.read_csv('data_crawler/index_code_master/index_code_master.csv', dtype = {'ticker':str, 
                                                                                                'index_code':str})

not_sectors = ["1002","1003","1004","1028","1034","1035","1150","1151",
           "1152","1153","1154","1155","1156","1157","1158","1159",
           "1160","1167","1182","1224","1227","1232","1244","1894",
           "2002","2003","2004","2181","2182","2183","2184","2189",
           "2203","2212","2213","2214","2215","2216","2217","2218"]

index_code_master = index_code_master[~index_code_master['index_code'].isin(not_sectors)].reset_index(drop = True)


## 상황 단일주가 (삼성전자

- trading 추가

In [9]:
ticker_nm = '005930'

### 주가 등락률

In [10]:
kor_stock_ohlcv_kospi_sample = kor_stock_ohlcv_kospi[kor_stock_ohlcv_kospi['ticker'] == ticker_nm]

In [31]:
# df1_set =  ohlcv_df_raw[ohlcv_df_raw['date'] > '2023-10-01'].groupby("ticker")['date'].max().reset_index()
df1_set =  kor_stock_ohlcv_kospi.groupby("ticker")['date'].max().reset_index()
df1_set = df1_set[['ticker', 'date']]
df1_set = pd.merge(df1_set, kor_stock_ohlcv_kospi[['ticker', 'date', 'close', 'corp_name']],
              on = ['ticker', 'date'], 
              how = 'left')

diff_date_list = [30, 90, 180, 240, 365]
for diff_date in diff_date_list:
# diff_date = 240
    now = datetime.now()
    now = now + timedelta(days=-diff_date)
    set_date = now.strftime('%Y-%m-%d')
    df1 =  kor_stock_ohlcv_kospi[kor_stock_ohlcv_kospi['date'] > set_date].groupby("ticker")['date'].min().reset_index()

    
    df1 = df1[['ticker','date']]
    df1 = pd.merge(df1, kor_stock_ohlcv_kospi[['ticker', 'date', 'close']],
                  on = ['ticker', 'date'], 
                  how = 'left')
    
    df1.columns = ['ticker', f'date_{diff_date}', f'close_{diff_date}']

    df1_set = pd.merge(df1_set, df1, 
                      on = 'ticker', 
                      how = 'left')

    df1_set[f'per_{diff_date}'] = (df1_set['close'] - df1_set[f'close_{diff_date}']) / df1_set[f'close_{diff_date}'] * 100


df1_set_2 = df1_set[['date','ticker','per_30', 'per_90', 'per_180', 'per_240', 'per_365', 'corp_name']]

df1_set_3 = pd.melt(df1_set_2, 
        id_vars= ['date', 'ticker', 'corp_name'], 
        value_vars=['per_30', 'per_90', 'per_180', 'per_240','per_365'])    
stock_ratio_per = df1_set_3.sort_values(by = ['ticker'])
stock_ratio_per.head()

Unnamed: 0,date,ticker,corp_name,variable,value
0,2024-01-05 00:00:00+00:00,20,동화약품,per_30,6.727829
2859,2024-01-05 00:00:00+00:00,20,동화약품,per_240,16.075388
1906,2024-01-05 00:00:00+00:00,20,동화약품,per_180,1.848249
3812,2024-01-05 00:00:00+00:00,20,동화약품,per_365,12.21865
953,2024-01-05 00:00:00+00:00,20,동화약품,per_90,7.27459


#### 주가 등락률 순위

In [13]:

df_per_total = pd.DataFrame()
per_set = ['per_30', 'per_90', 'per_180', 'per_240', 'per_365']
ascending_list = [True, False]

for per_value in per_set:
    for ascending_value in ascending_list:
        df_per =  df1_set_2.sort_values(by = per_value, ascending = ascending_value).head()
        df_per['type'] = per_value
        df_per['rank'] = range(1,6)
        df_per['음/양'] = ascending_value
        df_per_total = pd.concat([df_per_total, df_per])

df_per_total = df_per_total.drop_duplicates()     
df_per_total = df_per_total.reset_index(drop = True)

In [14]:
df_per_total

Unnamed: 0,date,ticker,per_30,per_90,per_180,per_240,per_365,corp_name,type,rank,음/양
0,2024-01-05 00:00:00+00:00,017860,-34.625,-34.625,-34.625,-34.625,-34.625,DS단석,per_30,1,True
1,2024-01-05 00:00:00+00:00,001685,-30.289193,64.629763,64.393396,52.058433,42.679128,대상우,per_30,2,True
2,2024-01-05 00:00:00+00:00,000240,-29.078014,31.926121,27.659574,11.856823,11.111111,한국앤컴퍼니,per_30,3,True
3,2024-01-05 00:00:00+00:00,033250,-25.782609,-10.157895,72.948328,47.28214,70.870871,체시스,per_30,4,True
4,2024-01-05 00:00:00+00:00,36328K,-24.509804,5.769231,-15.291529,-27.014218,-35.833333,티와이홀딩스우,per_30,5,True
5,2024-01-05 00:00:00+00:00,068290,43.975494,52.597403,5.75,-5.369128,-14.889336,삼성출판사,per_30,1,False
6,2024-01-05 00:00:00+00:00,450080,43.060498,251.398601,251.398601,251.398601,251.398601,에코프로머티,per_30,2,False
7,2024-01-05 00:00:00+00:00,452260,42.88499,37.009346,4.415954,-12.686123,-31.173709,한화갤러리아,per_30,3,False
8,2024-01-05 00:00:00+00:00,009415,41.069627,26.173285,17.281879,21.777003,-4.377565,태영건설우,per_30,4,False
9,2024-01-05 00:00:00+00:00,180640,39.327731,96.445498,77.706324,101.213592,112.83697,한진칼,per_30,5,False


### 주가 보조지표 매수/매도 

In [15]:
# dfdf = kor_stock_ohlcv_anal_kospi[kor_stock_ohlcv_anal_kospi['ticker'] == ticker_nm].tail(1)

In [16]:
date_nm = max(kor_stock_ohlcv_anal_kospi['date'])
dfdf = kor_stock_ohlcv_anal_kospi[kor_stock_ohlcv_anal_kospi['date'] == date_nm]

In [17]:
# pd.wide_to_long(dfdf, stubnames='ht', i=['date', 'ticker','corp_name','market','close'], j='age')
dfdf2 = pd.melt(dfdf, 
        id_vars= ['date', 'ticker', 'corp_name'], 
        value_vars=['5_20_cross', '20_60_cross', 'array', 'Bollinger_band','MACD','RSI'])
stock_indicator = dfdf2[dfdf2['value'] != '-']
stock_indicator

Unnamed: 0,date,ticker,corp_name,variable,value
19,2024-01-05 00:00:00+00:00,096770,SK이노베이션,5_20_cross,데드크로스(매도)
28,2024-01-05 00:00:00+00:00,000810,삼성화재,5_20_cross,데드크로스(매도)
52,2024-01-05 00:00:00+00:00,377300,카카오페이,5_20_cross,데드크로스(매도)
69,2024-01-05 00:00:00+00:00,251270,넷마블,5_20_cross,데드크로스(매도)
72,2024-01-05 00:00:00+00:00,028050,삼성엔지니어링,5_20_cross,데드크로스(매도)
...,...,...,...,...,...
5141,2024-01-05 00:00:00+00:00,072710,농심홀딩스,RSI,RSI 하단 상향 돌파(매수)
5281,2024-01-05 00:00:00+00:00,003555,LG우,RSI,RSI 하단 상향 돌파(매수)
5285,2024-01-05 00:00:00+00:00,100250,진양홀딩스,RSI,RSI 하단 상향 돌파(매수)
5422,2024-01-05 00:00:00+00:00,005870,휴니드,RSI,RSI 상단 하향돌파(매도)


Unnamed: 0,ticker,index_code,index_code_nm,market,corp_name
0,5930,1001,코스피,KOSPI,삼성전자
1,5930,1013,전기전자,KOSPI,삼성전자
2,5930,1027,제조업,KOSPI,삼성전자


### 인덱스 등락률

In [19]:
# kor_index_ohlcv_cleaning_sample = kor_index_ohlcv_cleaning[kor_index_ohlcv_cleaning['index_code'].isin(ticker_index_code_df['index_code'])]

In [35]:
# df1_set =  ohlcv_df_raw[ohlcv_df_raw['date'] > '2023-10-01'].groupby("ticker")['date'].max().reset_index()
df1_set =  kor_index_ohlcv_cleaning.groupby("index_code")['date'].max().reset_index()
df1_set = df1_set[['index_code', 'date']]
df1_set = pd.merge(df1_set, kor_index_ohlcv_cleaning[['index_code', 'date', 'close', 'index_code_nm']],
              on = ['index_code', 'date'], 
              how = 'left')



diff_date_list = [30, 90, 180, 240, 365]
for diff_date in diff_date_list:
# diff_date = 240
    now = datetime.now()
    now = now + timedelta(days=-diff_date)
    set_date = now.strftime('%Y-%m-%d')
    df1 =  kor_index_ohlcv_cleaning[kor_index_ohlcv_cleaning['date'] > set_date].groupby("index_code")['date'].min().reset_index()

    
    df1 = df1[['index_code','date']]
    df1 = pd.merge(df1, kor_index_ohlcv_cleaning[['index_code', 'date', 'close']],
                  on = ['index_code', 'date'], 
                  how = 'left')
    
    df1.columns = ['index_code', f'date_{diff_date}', f'close_{diff_date}']

    df1_set = pd.merge(df1_set, df1, 
                      on = 'index_code', 
                      how = 'left')

    df1_set[f'per_{diff_date}'] = (df1_set['close'] - df1_set[f'close_{diff_date}']) / df1_set[f'close_{diff_date}'] * 100

df1_set_2 = df1_set[['date','index_code','per_30', 'per_90', 'per_180', 'per_240', 'per_365', 'index_code_nm']]


df1_set_3 = pd.melt(df1_set_2, 
        id_vars= ['date', 'index_code', 'index_code_nm'], 
        value_vars=['per_30', 'per_90', 'per_180', 'per_240','per_365'])    
index_ratio_per = df1_set_3.sort_values(by = ['index_code'])
index_ratio_per.head()


Unnamed: 0,date,index_code,index_code_nm,variable,value
0,2024-01-05 00:00:00+00:00,1001,코스피,per_30,1.332453
388,2024-01-05 00:00:00+00:00,1001,코스피,per_365,8.046218
97,2024-01-05 00:00:00+00:00,1001,코스피,per_90,5.822086
291,2024-01-05 00:00:00+00:00,1001,코스피,per_240,2.49185
194,2024-01-05 00:00:00+00:00,1001,코스피,per_180,-1.562428


#### 인덱스 등락률 순위

In [36]:

df_per_total = pd.DataFrame()
per_set = ['per_30', 'per_90', 'per_180', 'per_240', 'per_365']
ascending_list = [True, False]

for per_value in per_set:
    for ascending_value in ascending_list:
        df_per =  df1_set_2.sort_values(by = per_value, ascending = ascending_value).head()
        df_per['type'] = per_value
        df_per['rank'] = range(1,6)
        df_per['음/양'] = ascending_value
        df_per_total = pd.concat([df_per_total, df_per])

df_per_total = df_per_total.drop_duplicates()     
df_per_total = df_per_total.reset_index(drop = True)

### 인덱스 보조지표 매수/매도

In [23]:
# kor_index_ohlcv_anal_cleaning_df = kor_index_ohlcv_anal_cleaning[kor_index_ohlcv_anal_cleaning['index_code'].isin(ticker_index_code_df['index_code'])]


date_nm = max(kor_index_ohlcv_anal_cleaning['date'])
kor_index_ohlcv_anal_cleaning_df_22 = kor_index_ohlcv_anal_cleaning[kor_index_ohlcv_anal_cleaning['date'] == date_nm]

In [25]:
# pd.wide_to_long(dfdf, stubnames='ht', i=['date', 'ticker','corp_name','market','close'], j='age')
kor_index_ohlcv_anal_cleaning_df_2 = pd.melt(kor_index_ohlcv_anal_cleaning_df_22, 
        id_vars= ['date', 'index_code', 'index_code_nm'], 
        value_vars=['5_20_cross', '20_60_cross', 'array', 'Bollinger_band','MACD','RSI'])

index_indicator = kor_index_ohlcv_anal_cleaning_df_2[kor_index_ohlcv_anal_cleaning_df_2['value'] != '-']

In [37]:
index_indicator.head()

Unnamed: 0,date,index_code,index_code_nm,variable,value
15,2024-01-05 00:00:00+00:00,1016,유통업,5_20_cross,데드크로스(매도)
31,2024-01-05 00:00:00+00:00,1154,코스피 200 에너지/화학,5_20_cross,데드크로스(매도)
33,2024-01-05 00:00:00+00:00,1156,코스피 200 금융,5_20_cross,데드크로스(매도)
38,2024-01-05 00:00:00+00:00,1167,코스피 200 중소형주,5_20_cross,데드크로스(매도)
63,2024-01-05 00:00:00+00:00,2072,일반전기전자,5_20_cross,골든크로스(매수)


### 최종

In [68]:
ticker_nm = '005930'
ticker_index_code_df = index_code_master[index_code_master['ticker'] == ticker_nm].reset_index(drop = True)
index_code_list = ticker_index_code_df['index_code']

In [58]:
ticker_index_code_df['index_code'].to_list().extend(ticker_nm)

0    1001
1    1013
2    1027
Name: index_code, dtype: object

In [62]:
df1 = pd.concat([stock_ratio_per,stock_indicator])
df1['type'] = 'stock'
df1.columns = ['date', 'code', 'code_nm', 'variable', 'value', 'type']

In [63]:
df2 = pd.concat([index_ratio_per,index_indicator])
df2['type'] = 'index'
df2.columns = ['date', 'code', 'code_nm', 'variable', 'value', 'type']

In [64]:
df3 = pd.concat([df1,df2]).reset_index(drop = True)

In [65]:
df4 = df3[['code','code_nm', 'variable', 'value', 'type']]

In [74]:
total_df = df4[(df4['code'] == ticker_nm) | (df4['code'].isin(index_code_list))].reset_index(drop = True)

In [75]:
total_df

Unnamed: 0,code,code_nm,variable,value,type
0,5930,삼성전자,per_180,4.502046,stock
1,5930,삼성전자,per_240,15.70997,stock
2,5930,삼성전자,per_90,13.818722,stock
3,5930,삼성전자,per_365,25.986842,stock
4,5930,삼성전자,per_30,4.787962,stock
5,5930,삼성전자,array,정배열(매수),stock
6,1001,코스피,per_30,1.332453,index
7,1001,코스피,per_365,8.046218,index
8,1001,코스피,per_90,5.822086,index
9,1001,코스피,per_240,2.49185,index


## gemini 

In [7]:
import google.generativeai as genai

api_key_df = pd.read_csv('key_value/chatgpt_apikey.csv')
GOOGLE_API_KEY = api_key_df[api_key_df['corp'] == 'google'].reset_index()['api_key'][0]


genai.configure(api_key=GOOGLE_API_KEY)

# Set up the model
generation_config = {
  "temperature": 0.9,
  "top_p": 1,
  "top_k": 1,
  "max_output_tokens": 2048,
}

model = genai.GenerativeModel('gemini-pro',
                             generation_config=generation_config)

In [77]:
total_df.columns()

Unnamed: 0,code,code_nm,variable,value,type
0,5930,삼성전자,per_180,4.502046,stock
1,5930,삼성전자,per_240,15.70997,stock
2,5930,삼성전자,per_90,13.818722,stock
3,5930,삼성전자,per_365,25.986842,stock
4,5930,삼성전자,per_30,4.787962,stock
5,5930,삼성전자,array,정배열(매수),stock
6,1001,코스피,per_30,1.332453,index
7,1001,코스피,per_365,8.046218,index
8,1001,코스피,per_90,5.822086,index
9,1001,코스피,per_240,2.49185,index


In [76]:
# date_nm = df3['date'][0].strftime('%Y-%m-%d')

prompt = f"""
- 날짜:{date_nm} 

- {total_df}


보고서 형태로 설명식으로 요약해줘. 
"""

response = model.generate_content(prompt)
print(response.text)

**삼성전자(005930):**

* 과거 180일 (약 6개월) 최저 PER는 4.502046
* 과거 240일 (약 8개월) 최저 PER는 15.70997
* 과거 90일 (약 3개월) 최저 PER는 13.818722
* 과거 365일 (약 1년) 최저 PER는 25.986842
* 과거 30일 (약 1개월) 최저 PER는 4.787962

삼성전자는 현재 **정배열(매수)** 상태에 있으며, 이는 해당 주식이 현재 매수 포인트에 있다는 것을 나타냅니다.

**코스피(1001):**

* 과거 180일 (약 6개월) 최저 PER는 -1.562428
* 과거 240일 (약 8개월) 최저 PER는 2.49185
* 과거 90일 (약 3개월) 최저 PER는 5.822086
* 과거 365일 (약 1년) 최저 PER는 8.046218
* 과거 30일 (약 1개월) 최저 PER는 1.332453

코스피는 현재 **하향돌파(매도)** 상태에 있으며, 이는 해당 지수가 현재 매도 포인트에 있다는 것을 나타냅니다.

**전기전자(1013):**

* 과거 180일 (약 6개월) 최저 PER는 -2.191732
* 과거 240일 (약 8개월) 최저 PER는 6.94095
* 과거 90일 (약 3개월) 최저 PER는 8.12246
* 과거 365일 (약 1년) 최저 PER는 18.421342
* 과거 30일 (약 1개월) 최저 PER는 2.912949

전기전자는 현재 매수 포인트를 벗어났으나, 최저 PER가 전반적으로 낮은 수준을 유지하고 있습니다.

**제조업(1027):**

* 과거 180일 (약 6개월) 최저 PER는 -3.356877
* 과거 240일 (약 8개월) 최저 PER는 2.615615
* 과거 90일 (약 3개월) 최저 PER는 5.967981
* 과거 365일 (약 1년) 최저 PER는 12.564354
* 과거 30일 (약 1개월) 최저 PER는 2.716707

제조업은 현재 매수 포인트를 벗어났으나, 최저 PER가 전반적으로 낮은 수준을 유지하