In [124]:
import math
import numpy as np
import pandas as pd
import os
import glob

from pykrx import stock
from pykrx import bond

import time
from time import sleep
from datetime import datetime
from datetime import timedelta

from pyarrow import csv
import pyarrow as pa
import pyarrow.parquet as pq

from ta.trend import MACD
from ta.momentum import StochasticOscillator

import psycopg2 as pg2
from sqlalchemy import create_engine

from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go

from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage


import warnings
warnings.filterwarnings('ignore')

# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')


# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owenchoi-404302'
dataset_id = 'finance_mlops'


# BigQuery 클라이언트 객체 생성
client = bigquery.Client(credentials = credentials, 
                         project = credentials.project_id)


# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials,
                         project = credentials.project_id)
bucket_name = 'finance-mlops-proj'    # 서비스 계정 생성한 bucket 이름 입력

# Postgresql 연결
db_connect_info = pd.read_csv('key_value/db_connect_info.csv')
username = db_connect_info['username'][0]
password = db_connect_info['password'][0]
host = db_connect_info['host'][0]
database = db_connect_info['database'][0]
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:5432/{database}')



now = datetime.now()
# now = now + timedelta(days=-2)
today_date1 = now.strftime('%Y%m%d')
today_date2 = now.strftime('%Y-%m-%d')
today_date_time_csv = now.strftime("%Y%m%d_%H%M")



now = datetime.now()
now = now + timedelta(days=-365 * 2)
set_date_1 = now.strftime('%Y%m%d')
query_date = now.strftime('%Y-%m-%d')

In [125]:
now = datetime.now()
print(now)

2024-01-08 18:55:39.562120


In [126]:
file_name = 'kor_stock_ohlcv'
if not os.path.exists(f'data_crawler/cleaning/{file_name}'):
    os.makedirs(f'data_crawler/cleaning/{file_name}')

In [133]:
sql = f"""
select 
  `date`,
  open, 
  high, 
  low, 
  close, 
  volume,
  `{project_id}.{dataset_id}.kor_stock_ohlcv`.ticker,
  corp_name, 
  market,
  rank
from `{project_id}.{dataset_id}.kor_stock_ohlcv`
left join  `{project_id}.{dataset_id}.kor_ticker_list`
on `{project_id}.{dataset_id}.kor_stock_ohlcv`.ticker = `{project_id}.{dataset_id}.kor_ticker_list`.ticker
where date > '{query_date}' and market = 'KOSPI'
order by date, rank asc
limit 100
"""

# 데이터 조회 쿼리 실행 결과
query_job = client.query(sql)

# 데이터프레임 변환
ohlcv_df_raw = query_job.to_dataframe()

In [136]:
sql = f"""
select *
from `{project_id}.{dataset_id}.kor_stock_ohlcv`
where date > '{query_date}'
"""

# 데이터 조회 쿼리 실행 결과
query_job = client.query(sql)

# 데이터프레임 변환
ohlcv_df_raw = query_job.to_dataframe()

In [139]:
sql = f"""
select *
from `{project_id}.{dataset_id}.kor_ticker_list`
"""

# 데이터 조회 쿼리 실행 결과
query_job = client.query(sql)

# 데이터프레임 변환
kor_ticker_list = query_job.to_dataframe()

In [142]:
df = pd.merge(ohlcv_df_raw, kor_ticker_list, 
        on = 'ticker', 
        how = 'left')

In [159]:

ohlcv_df_raw[ohlcv_df_raw['ticker'] == '373220']
ohlcv_df_raw
ohlcv_df_raw[ohlcv_df_raw['ticker'] == '095570']

Unnamed: 0,date,open,high,low,close,volume,price_change_percentage,ticker
0,2022-01-20 00:00:00+00:00,5070,5230,5030,5230,66739,2.952756,095570
1,2022-01-21 00:00:00+00:00,5220,5250,5170,5220,42089,-0.191205,095570
2,2022-01-26 00:00:00+00:00,5050,5070,4980,4985,44098,-1.287129,095570
3,2022-01-27 00:00:00+00:00,5030,5030,4820,4820,59264,-3.309930,095570
4,2022-02-04 00:00:00+00:00,5050,5130,4975,5130,46619,1.584158,095570
...,...,...,...,...,...,...,...,...
1149915,2023-01-30 00:00:00+00:00,6200,6250,6100,6200,110771,0.000000,095570
1149916,2023-02-03 00:00:00+00:00,6700,6840,6630,6700,216823,0.000000,095570
1149917,2023-02-23 00:00:00+00:00,5460,5490,5390,5460,221103,0.000000,095570
1149918,2023-07-28 00:00:00+00:00,4230,4270,4140,4230,83551,0.000000,095570


In [156]:
kor_ticker_list[kor_ticker_list['ticker'] == '373220']

Unnamed: 0,ticker,corp_name,market,rank
2,373220,LG에너지솔루션,KOSPI,3


In [145]:
df[df['market'] == 'KOSPI'].sort_values(by = ['date', 'rank'])

Unnamed: 0,date,open,high,low,close,volume,price_change_percentage,ticker,corp_name,market,rank
201911,2022-01-10 00:00:00+00:00,78100,78100,77100,78000,9947422,-0.383142,005930,삼성전자,KOSPI,1
70504,2022-01-10 00:00:00+00:00,126500,127000,123000,124500,3449197,-1.968504,000660,SK하이닉스,KOSPI,2
193250,2022-01-10 00:00:00+00:00,843016,843016,824239,826216,49426,-0.947823,207940,삼성바이오로직스,KOSPI,4
202150,2022-01-10 00:00:00+00:00,71200,71200,70400,70900,577054,-0.700280,005935,삼성전자우,KOSPI,5
66296,2022-01-10 00:00:00+00:00,304000,308500,301000,304000,303091,-0.327869,005490,POSCO홀딩스,KOSPI,6
...,...,...,...,...,...,...,...,...,...,...,...
223889,2024-01-05 00:00:00+00:00,7040,7040,6810,7020,105,0.142653,014915,성문전자우,KOSPI,949
320372,2024-01-05 00:00:00+00:00,5140,5140,4600,4670,9584,1.521739,002785,진흥기업우B,KOSPI,950
151459,2024-01-05 00:00:00+00:00,6040,6140,6030,6140,1223,2.333333,001525,동양우,KOSPI,951
320144,2024-01-05 00:00:00+00:00,12470,12740,12450,12600,1040,1.123596,002787,진흥기업2우B,KOSPI,952


In [74]:
ohlcv_df_raw = ohlcv_df_raw.fillna(0)
ticker_list = ohlcv_df_raw['ticker'].unique()

### 주가 지표

In [76]:
df_raw_total = pd.DataFrame()
df_raw_anal_total = pd.DataFrame()

for ticker_nm in ticker_list[:5]:
    df_raw = ohlcv_df_raw[ohlcv_df_raw['ticker'] == ticker_nm].reset_index(drop = True)

    ######################################################################
    # 보조지표
    ######################################################################

    # 이동평균선
    df_raw['MA5'] = df_raw['close'].rolling(window=5).mean()
    df_raw['MA20'] = df_raw['close'].rolling(window=20).mean()
    df_raw['MA60'] = df_raw['close'].rolling(window=60).mean()
    df_raw['MA120'] = df_raw['close'].rolling(window=120).mean()

    # 볼린저밴드
    std = df_raw['close'].rolling(20).std(ddof=0)

    df_raw['upper'] = df_raw['MA20'] + 2 * std
    df_raw['lower'] = df_raw['MA20'] - 2 * std

    macd = MACD(close=df_raw['close'],
                window_slow=26,
                window_fast=12,
                window_sign=9)


    df_raw['MACD_DIFF'] = macd.macd_diff()
    df_raw['MACD'] = macd.macd()
    df_raw['MACD_Signal'] = macd.macd_signal()

    # RSI
    df_raw['변화량'] = df_raw['close'] - df_raw['close'].shift(1)
    df_raw['변화량'] = df_raw['변화량'].astype('float64')
    df_raw['상승폭'] = np.where(df_raw['변화량']>=0, df_raw['변화량'], 0)
    df_raw['하락폭'] = np.where(df_raw['변화량'] <0, df_raw['변화량'].abs(), 0)

    # welles moving average
    df_raw['AU'] = df_raw['상승폭'].ewm(alpha=1/14, min_periods=14).mean()
    df_raw['AD'] = df_raw['하락폭'].ewm(alpha=1/14, min_periods=14).mean()
    df_raw['RSI'] = df_raw['AU'] / (df_raw['AU'] + df_raw['AD']) * 100

    df_raw['MA5-20'] = df_raw['MA5'] - df_raw['MA20']
    df_raw['MA20-60'] = df_raw['MA20'] - df_raw['MA60']
    df_raw['MA60-120'] = df_raw['MA60'] - df_raw['MA120']


    ######################################################################
    # 보조지표 분석
    ######################################################################
    df_raw_anal = df_raw[['date','ticker', 'corp_name','market', 'close']]

    # 골든크로스
    # 골든 크로스 5-20
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_5_20_gold_cross = [idx for idx in range(len(df_raw)) if df_raw["MA5-20"].iloc[idx] > 0 and df_raw["MA5-20"].iloc[idx - 1] <= 0]

    # 데드 크로스 5-20
    # 양수에서 음수로 바뀌는 모든 인덱스 찾기
    idx_5_20_dead_cross = [idx for idx in range(len(df_raw)) if df_raw["MA5-20"].iloc[idx] < 0 and df_raw["MA5-20"].iloc[idx - 1] >= 0]

    # 골든 크로스 20-60
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_20_60_gold_cross = [idx for idx in range(len(df_raw)) if df_raw["MA20-60"].iloc[idx] > 0 and df_raw["MA20-60"].iloc[idx - 1] <= 0]

    # 골든 크로스 20-60
    # 음수에서 양수로 바뀌는 모든 인덱스 찾기
    idx_20_60_dead_cross = [idx for idx in range(len(df_raw)) if df_raw["MA20-60"].iloc[idx] < 0 and df_raw["MA20-60"].iloc[idx - 1] >= 0]


    df_raw_anal.loc[:, '5_20_cross'] = '-'
    df_raw_anal.loc[idx_5_20_gold_cross,'5_20_cross'] = '골든크로스(매수)'
    df_raw_anal.loc[idx_5_20_dead_cross,'5_20_cross'] = '데드크로스(매도)'

    df_raw_anal.loc[:, '20_60_cross'] = '-'
    df_raw_anal.loc[idx_20_60_gold_cross,'20_60_cross'] = '골든크로스(매수)'
    df_raw_anal.loc[idx_20_60_dead_cross,'20_60_cross'] = '데드크로스(매도)'


    # 정배열 역배열
    ascending_sq  = (df_raw['MA5-20'] > 0) & \
    (df_raw['MA20-60'] > 0) & \
    (df_raw['MA60-120'] > 0)

    descending_sq  = (df_raw['MA5-20'] < 0) & \
    (df_raw['MA20-60'] < 0) & \
    (df_raw['MA60-120'] < 0)

    df_raw_anal.loc[:,'array'] = '-'
    df_raw_anal.loc[ascending_sq,'array'] = '정배열(매수)'
    df_raw_anal.loc[descending_sq,'array'] = '역배열(매도)'


    # 볼린저밴드
    df_raw['close'] = df_raw['close'].astype('float64')
    down_reg_sq = df_raw['upper'] - df_raw['close']
    top_reg_sq  = df_raw['lower'] - df_raw['close']

    down_reg = [idx for idx in range(1,len(df_raw)) if down_reg_sq[idx] > 0 and down_reg_sq[idx-1] <= 0]
    top_reg = [idx for idx in range(1,len(df_raw)) if top_reg_sq[idx] < 0 and top_reg_sq[idx-1] >= 0]

    df_raw_anal.loc[:,'Bollinger_band'] = '-'
    df_raw_anal.loc[down_reg,'Bollinger_band'] = '하향회귀(매도)'
    df_raw_anal.loc[top_reg,'Bollinger_band'] = '상향회귀(매수)'


    # MACD
    signal_down_cross = [idx for idx in range(1,len(df_raw)) if df_raw['MACD_DIFF'][idx] < 0 and df_raw['MACD_DIFF'][idx-1] >= 0]
    signal_top_corss = [idx for idx in range(1,len(df_raw)) if df_raw['MACD_DIFF'][idx] > 0 and df_raw['MACD_DIFF'][idx-1] <= 0]

    df_raw_anal.loc[:,'MACD'] = '-'
    df_raw_anal.loc[signal_down_cross,'MACD'] = '하향돌파(매도)'
    df_raw_anal.loc[signal_top_corss,'MACD'] = '상향돌파(매수)'

    # RSI
    down_reg = [idx for idx in range(1,len(df_raw)) if df_raw['RSI'][idx] > 70 and df_raw['RSI'][idx-1] <= 70]
    top_reg = [idx for idx in range(1,len(df_raw)) if df_raw['RSI'][idx] < 30 and df_raw['RSI'][idx-1] >= 30]


    df_raw_anal.loc[:,'RSI'] = '-'
    df_raw_anal.loc[down_reg,'RSI'] = 'RSI 상단 하향돌파(매도)'
    df_raw_anal.loc[top_reg,'RSI'] = 'RSI 하단 상향 돌파(매수)'


    df_raw_total = pd.concat([df_raw_total, df_raw])
    df_raw_anal_total = pd.concat([df_raw_anal_total, df_raw_anal])

    print(ticker_nm)


df_raw_total = df_raw_total.reset_index(drop = True)
df_raw_anal_total = df_raw_anal_total.reset_index(drop = True)

005930
247540
000660
086520
091990


In [251]:
df_raw_total.head(3)

Unnamed: 0,date,open,high,low,close,volume,price_change_percentage,ticker,corp_name,market,...,MACD_Signal,변화량,상승폭,하락폭,AU,AD,RSI,MA5-20,MA20-60,MA60-120
0,2018-01-02 00:00:00+00:00,68519,68519,66664,66930.0,2522,-1.686301,1045,CJ우,KOSPI,...,,,0.0,0.0,,,,,,
1,2018-01-03 00:00:00+00:00,68254,68254,66488,67106.0,2501,0.262961,1045,CJ우,KOSPI,...,,176.0,176.0,0.0,,,,,,
2,2018-01-04 00:00:00+00:00,67636,68077,65870,66665.0,8353,-0.657169,1045,CJ우,KOSPI,...,,-441.0,0.0,441.0,,,,,,


In [250]:
df_raw_anal_total.head(3)

Unnamed: 0,date,ticker,corp_name,market,close,5_20_cross,20_60_cross,array,Bollinger_band,MACD,RSI
0,2018-01-02 00:00:00+00:00,1045,CJ우,KOSPI,66930,-,-,-,-,-,-
1,2018-01-03 00:00:00+00:00,1045,CJ우,KOSPI,67106,-,-,-,-,-,-
2,2018-01-04 00:00:00+00:00,1045,CJ우,KOSPI,66665,-,-,-,-,-,-


In [77]:
df_raw_total = df_raw_total[['date', 'open', 'high', 'low', 'close', 'volume', 'price_change_percentage',
                            'ticker', 'corp_name', 'market', 
                            'MA5','MA20', 'MA60', 'MA120', 
                            'upper', 'lower', 'MACD_DIFF', 'MACD','MACD_Signal', 'RSI']]

In [78]:
now = datetime.now()
now = now + timedelta(days=-365)
set_date_1 = now.strftime('%Y%m%d')
set_date_2 = now.strftime('%Y-%m-%d')

df_raw_total_2 = df_raw_total[df_raw_total['date'] > set_date_2].reset_index(drop = True)
df_raw_anal_total_2 = df_raw_anal_total[df_raw_anal_total['date'] > set_date_2].reset_index(drop = True)

In [None]:


## 매수 매도 카운트
max_date = max(df_raw_anal_total_2['date'])
buy_sell_count = df_raw_anal_total_2[df_raw_anal_total_2['date'] == max_date].reset_index(drop = True)



table_from_pandas = pa.Table.from_pandas(df_raw_total_2,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet')

table_from_pandas = pa.Table.from_pandas(df_raw_anal_total_2,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_anal_kospi.parquet')


table_from_pandas = pa.Table.from_pandas(buy_sell_count,preserve_index = False)
pq.write_table(table_from_pandas, f'data_crawler/cleaning/kor_stock_ohlcv/buy_sell_count.parquet')


# Google Storage 적재
source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)


# Google Storage 적재
source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_anal_kospi.parquet'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_anal_kospi.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)


# Google Storage 적재
source_file_name = f'data_crawler/cleaning/kor_stock_ohlcv/buy_sell_count.parquet'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/cleaning/kor_stock_ohlcv/buy_sell_count.parquet'    # 업로드할 파일을 GCP에 저장할 때의 이름
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)

### 매수, 매도 개수 카운트

In [98]:
# df_raw_anal_total_2.groupby("ticker")['date'].max().reset_index()

max_date = max(df_raw_anal_total_2['date'])
buy_sell_count = df_raw_anal_total_2[df_raw_anal_total_2['date'] == max_date].reset_index(drop = True)


In [163]:
df_raw_total_2[df_raw_total_2['ticker'] == '005930'].tail(3)

Unnamed: 0,date,open,high,low,close,volume,ticker,corp_name,market,MA5,MA20,MA60,MA120,upper,lower,MACD_DIFF,MACD,MACD_Signal,RSI
239,2023-12-28 00:00:00+00:00,77700,78500,77500,78500.0,17797536,5930,삼성전자,KOSPI,76800.0,73760.0,71061.666667,70319.166667,77795.046468,69724.953532,489.622149,1540.200698,1050.578549,78.393348
240,2024-01-02 00:00:00+00:00,78200,79800,78200,79600.0,17142847,5930,삼성전자,KOSPI,77720.0,74100.0,71263.333333,70382.5,78838.776213,69361.223787,570.506641,1763.71185,1193.205209,80.903817
241,2024-01-03 00:00:00+00:00,78500,78800,77000,77000.0,21753644,5930,삼성전자,KOSPI,77940.0,74350.0,71435.0,70427.5,79146.457026,69553.542974,414.492423,1711.320738,1296.828315,62.437569
242,2024-01-04 00:00:00+00:00,76100,77300,76100,76600.0,15324439,5930,삼성전자,KOSPI,77940.0,74550.0,71611.666667,70483.333333,79371.410582,69728.589418,257.627354,1618.862508,1361.235153,60.162438
243,2024-01-05 00:00:00+00:00,76700,77100,76400,76600.0,11197601,5930,삼성전자,KOSPI,77660.0,74820.0,71781.666667,70542.5,79462.240838,70177.759162,133.391995,1527.975148,1394.583152,60.162438


In [164]:
df_raw_anal_total_2[df_raw_anal_total_2['ticker'] == '005930'].tail()

Unnamed: 0,date,ticker,corp_name,market,close,5_20_cross,20_60_cross,array,Bollinger_band,MACD,RSI
239,2023-12-28 00:00:00+00:00,5930,삼성전자,KOSPI,78500,-,-,정배열(매수),-,-,-
240,2024-01-02 00:00:00+00:00,5930,삼성전자,KOSPI,79600,-,-,정배열(매수),-,-,-
241,2024-01-03 00:00:00+00:00,5930,삼성전자,KOSPI,77000,-,-,정배열(매수),하향회귀(매도),-,-
242,2024-01-04 00:00:00+00:00,5930,삼성전자,KOSPI,76600,-,-,정배열(매수),-,-,-
243,2024-01-05 00:00:00+00:00,5930,삼성전자,KOSPI,76600,-,-,정배열(매수),-,-,-


In [160]:
buy_sell_count

Unnamed: 0,date,ticker,corp_name,market,close,5_20_cross,20_60_cross,array,Bollinger_band,MACD,RSI
0,2024-01-05 00:00:00+00:00,5930,삼성전자,KOSPI,76600,-,-,정배열(매수),-,-,-
1,2024-01-05 00:00:00+00:00,247540,에코프로비엠,KOSDAQ,315000,-,-,-,-,-,-
2,2024-01-05 00:00:00+00:00,660,SK하이닉스,KOSPI,137500,-,-,정배열(매수),-,-,-
3,2024-01-05 00:00:00+00:00,86520,에코프로,KOSDAQ,656000,-,-,역배열(매도),-,-,-
4,2024-01-05 00:00:00+00:00,91990,셀트리온헬스케어,KOSDAQ,75900,데드크로스(매도),-,-,-,-,-


In [108]:
# buy_sell_count['5_20_cross'] + buy_sell_count['20_60_cross'] 

buy_sell_count[['5_20_cross','20_60_cross', 'array', 'Bollinger_band', 'MACD', 'RSI']].apply(" ".join, axis=1)
# buy_sell_count.columns

0      - - 정배열(매수) - - -
1            - - - - - -
2      - - 정배열(매수) - - -
3      - - 역배열(매도) - - -
4    데드크로스(매도) - - - - -
dtype: object

In [82]:
df_raw_anal_total_2

Unnamed: 0,date,ticker,corp_name,market,close,5_20_cross,20_60_cross,array,Bollinger_band,MACD,RSI
0,2023-01-09 00:00:00+00:00,005930,삼성전자,KOSPI,60700,-,-,-,-,-,-
1,2023-01-10 00:00:00+00:00,005930,삼성전자,KOSPI,60400,골든크로스(매수),-,-,-,-,-
2,2023-01-11 00:00:00+00:00,005930,삼성전자,KOSPI,60500,-,-,-,-,-,-
3,2023-01-12 00:00:00+00:00,005930,삼성전자,KOSPI,60500,-,-,-,-,-,-
4,2023-01-13 00:00:00+00:00,005930,삼성전자,KOSPI,60800,-,-,-,-,-,-
...,...,...,...,...,...,...,...,...,...,...,...
1215,2023-12-28 00:00:00+00:00,091990,셀트리온헬스케어,KOSDAQ,75900,-,-,정배열(매수),-,-,-
1216,2024-01-02 00:00:00+00:00,091990,셀트리온헬스케어,KOSDAQ,75900,-,-,정배열(매수),-,-,-
1217,2024-01-03 00:00:00+00:00,091990,셀트리온헬스케어,KOSDAQ,75900,-,-,정배열(매수),-,-,-
1218,2024-01-04 00:00:00+00:00,091990,셀트리온헬스케어,KOSDAQ,75900,-,-,정배열(매수),-,-,-


Unnamed: 0,date,ticker,corp_name,market,close,5_20_cross,20_60_cross,array,Bollinger_band,MACD,RSI
0,2023-01-02 00:00:00+00:00,001045,CJ우,KOSPI,49300,데드크로스(매도),-,-,-,-,-
1,2023-01-03 00:00:00+00:00,001045,CJ우,KOSPI,49000,-,-,-,-,-,-
2,2023-01-04 00:00:00+00:00,001045,CJ우,KOSPI,48650,-,-,-,-,-,-
3,2023-01-05 00:00:00+00:00,001045,CJ우,KOSPI,48200,-,-,-,-,-,-
4,2023-01-06 00:00:00+00:00,001045,CJ우,KOSPI,48350,-,-,-,상향회귀(매수),-,-
...,...,...,...,...,...,...,...,...,...,...,...
1240,2023-12-28 00:00:00+00:00,005830,DB손해보험,KOSPI,83700,-,-,-,-,-,-
1241,2024-01-02 00:00:00+00:00,005830,DB손해보험,KOSPI,81600,-,-,-,-,-,-
1242,2024-01-03 00:00:00+00:00,005830,DB손해보험,KOSPI,80900,-,-,-,-,-,-
1243,2024-01-04 00:00:00+00:00,005830,DB손해보험,KOSPI,79200,-,-,-,-,-,-


In [170]:
def rank_per(df1, rank_count):
    max_value_1 = df1[df1['market'] == 'KOSPI'].sort_values(by ='price_change_percentage', ascending = False).head(rank_count)
    max_value_2 = df1[df1['market'] == 'KOSDAQ'].sort_values(by ='price_change_percentage', ascending = False).head(rank_count)
    min_value_1 = df1[df1['market'] == 'KOSPI'].sort_values(by ='price_change_percentage', ascending = True).head(rank_count)
    min_value_2 = df1[df1['market'] == 'KOSDAQ'].sort_values(by ='price_change_percentage', ascending = True).head(rank_count)
    
    total = pd.concat([max_value_1, max_value_2, 
               min_value_1, min_value_2
              ])
    total = total.reset_index(drop = True)
    return total

### 주가 등락 하루

In [179]:
df = stock.get_market_ohlcv("20240105", market="ALL")

In [180]:
df

Unnamed: 0_level_0,시가,고가,저가,종가,거래량,거래대금,등락률
티커,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
060310,3210,3325,3190,3300,1768963,5774925915,0.92
095570,5270,5290,5070,5280,410628,2137622110,1.15
006840,16950,17090,16950,16960,3005,51105470,-0.06
054620,6900,7460,6900,7300,112058,817598270,5.19
265520,23250,23400,22700,22950,104119,2386649500,-1.08
...,...,...,...,...,...,...,...
000540,3000,3030,2970,3000,56853,170400425,-0.50
000545,5640,5800,5640,5710,7264,41029090,1.24
003280,3185,3220,2835,2850,23883232,71191936640,-12.44
037440,7300,7490,7230,7340,158693,1172096760,0.55


In [178]:
df2 = rank_per(df_raw_total_2[df_raw_total_2['date'] == '2024-01-05'], 5)

KeyError: 'price_change_percentage'

### 주가 등락률 일주일 / 한달 / 1년

In [None]:
# df1_set =  ohlcv_df_raw[ohlcv_df_raw['date'] > '2023-10-01'].groupby("ticker")['date'].max().reset_index()
df1_set =  ohlcv_df_raw.groupby("ticker")['date'].max().reset_index()
df1_set = df1_set[['ticker', 'date']]
df1_set = pd.merge(df1_set, ohlcv_df_raw[['ticker', 'date', 'close', 'corp_name']],
              on = ['ticker', 'date'], 
              how = 'left')

diff_date_list = [30, 90, 180, 240, 365]
for diff_date in diff_date_list:
# diff_date = 240
    now = datetime.now()
    now = now + timedelta(days=-diff_date)
    set_date = now.strftime('%Y-%m-%d')
    df1 =  ohlcv_df_raw[ohlcv_df_raw['date'] > set_date].groupby("ticker")['date'].min().reset_index()

    
    df1 = df1[['ticker','date']]
    df1 = pd.merge(df1, ohlcv_df_raw[['ticker', 'date', 'close']],
                  on = ['ticker', 'date'], 
                  how = 'left')
    
    df1.columns = ['ticker', f'date_{diff_date}', f'close_{diff_date}']

    df1_set = pd.merge(df1_set, df1, 
                      on = 'ticker', 
                      how = 'left')

    df1_set[f'per_{diff_date}'] = (df1_set['close'] - df1_set[f'close_{diff_date}']) / df1_set[f'close_{diff_date}'] * 100


df1_set_2 = df1_set[['date','ticker','per_30', 'per_90', 'per_180', 'per_240', 'per_365', 'corp_name']]

df_per_total = pd.DataFrame()
per_set = ['per_30', 'per_90', 'per_180', 'per_240', 'per_365']
ascending_list = [True, False]

for per_value in per_set:
    for ascending_value in ascending_list:
        df_per =  df1_set_2.sort_values(by = per_value, ascending = ascending_value).head()
        df_per['type'] = per_value
        df_per['rank'] = range(1,6)
        df_per['음/양'] = ascending_value
        df_per_total = pd.concat([df_per_total, df_per])

df_per_total = df_per_total.drop_duplicates()     
df_per_total = df_per_total.reset_index(drop = True)

In [173]:
df_per_total

NameError: name 'df_per_total' is not defined

In [58]:
df_per_total[df_per_total['type'] == 'per_30'][['per_30', 'corp_name', 'rank']]

Unnamed: 0,per_30,corp_name,rank
0,-45.0,티와이홀딩스우,1
1,-34.625,DS단석,2
2,-33.481153,한국앤컴퍼니,3
3,-33.216374,흥국화재우,4
4,-32.876712,티와이홀딩스,5
5,170.44335,네오셈,1
6,107.083333,LS머트리얼즈,2
7,81.609195,골든센츄리,3
8,70.133588,디티앤씨알오,4
9,65.357143,제주반도체,5


In [165]:
df_per_total

NameError: name 'df_per_total' is not defined

주별/ 월별 평균

In [121]:
t = pq.read_table('data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet')
df11 = t.to_pandas()

In [123]:
df11['ticker'].unique()

array(['005930', '000660', '207940'], dtype=object)

In [115]:
t = pq.read_table('data_crawler/cleaning/kor_stock_ohlcv/kor_stock_ohlcv_kospi.parquet')
df = t.to_pandas()

In [118]:
df[df['ticker'] == '005930'].head(10)

Unnamed: 0,date,open,high,low,close,volume,price_change_percentage,ticker,corp_name,market,MA5,MA20,MA60,MA120,upper,lower,MACD_DIFF,MACD,MACD_Signal,RSI
0,2023-01-09 00:00:00+00:00,59700,60700,59600,60700.0,18640107,2.881356,5930,삼성전자,KOSPI,58220.0,58315.0,59373.333333,58841.666667,61407.749586,55222.250414,297.79225,-382.242616,-680.034867,59.5793
1,2023-01-10 00:00:00+00:00,60200,61100,59900,60400.0,14859797,-0.494234,5930,삼성전자,KOSPI,59220.0,58360.0,59436.666667,58829.166667,61545.215848,55174.784152,402.537204,-176.863361,-579.400566,58.001471
2,2023-01-11 00:00:00+00:00,61000,61200,60300,60500.0,12310751,0.165563,5930,삼성전자,KOSPI,59760.0,58400.0,59503.333333,58825.833333,61670.473972,55129.526028,458.751698,-5.960943,-464.712641,58.396977
3,2023-01-12 00:00:00+00:00,61100,61200,59900,60500.0,16102561,0.0,5930,삼성전자,KOSPI,60220.0,58400.0,59581.666667,58825.833333,61670.473972,55129.526028,474.174028,128.004893,-346.169134,58.396977
4,2023-01-13 00:00:00+00:00,60500,61200,60400,60800.0,12510328,0.495868,5930,삼성전자,KOSPI,60580.0,58475.0,59670.0,58817.5,61890.186671,55059.813329,481.284716,255.436761,-225.847955,59.716851
5,2023-01-16 00:00:00+00:00,61300,61600,60800,61100.0,10039972,0.493421,5930,삼성전자,KOSPI,60660.0,58555.0,59756.666667,58815.833333,62133.533219,54976.466781,481.71613,376.297207,-105.418923,61.047691
6,2023-01-17 00:00:00+00:00,61200,61500,60600,61000.0,9831456,-0.163666,5930,삼성전자,KOSPI,60780.0,58630.0,59815.0,58815.0,62344.888962,54915.111038,451.313494,458.722944,7.40945,60.332183
7,2023-01-18 00:00:00+00:00,60700,61000,59900,60400.0,11584041,-0.983607,5930,삼성전자,KOSPI,60760.0,58720.0,59860.0,58804.166667,62513.995256,54926.004744,370.240962,470.210653,99.969691,56.084752
8,2023-01-19 00:00:00+00:00,60500,61500,60400,61500.0,12808490,1.821192,5930,삼성전자,KOSPI,60960.0,58895.0,59895.0,58801.666667,62859.076185,54930.923815,369.305653,561.601757,192.296104,61.443908
9,2023-01-20 00:00:00+00:00,62100,62300,61100,61800.0,9646327,0.487805,5930,삼성전자,KOSPI,61160.0,59030.0,59933.333333,58800.833333,63191.778466,54868.221534,366.751979,650.736078,283.984099,62.778024
