In [10]:
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import pandas_gbq
from pykrx import stock
from pykrx import bond
import FinanceDataReader as fdr


from time import sleep

import psycopg2 as pg2
from sqlalchemy import create_engine

from datetime import datetime
from datetime import timedelta

import os
import time

import glob
from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage


# 경로 변경
os.chdir('/home/shjj08choi/finance_mlops')


# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owenchoi-396200'
dataset_id = 'finance_mlops'

# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials, 
                         project = credentials.project_id)
bucket_name = 'finance-mlops-1'     # 서비스 계정 생성한 bucket 이름 입력

# Postgresql 연결
db_connect_info = pd.read_csv('key_value/db_connect_info.csv')
username = db_connect_info['username'][0]
password = db_connect_info['password'][0]
host = db_connect_info['host'][0]
database = db_connect_info['database'][0]
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:5432/{database}')



now = datetime.now()
# now = now + timedelta(days=-2)
today_date1 = now.strftime('%Y%m%d')
start_date1 = '20180101'
today_date2 = now.strftime('%Y-%m-%d')
today_date_time_csv = now.strftime("%Y%m%d_%H%M")

def upload_df(data, file_name, project_id, dataset_id, time_line, today_date1):
    if not os.path.exists(f'data_crawler/{file_name}'):
        os.makedirs(f'data_crawler/{file_name}')

    try:
        if not os.path.exists(f'data_crawler/{file_name}/{file_name}_{today_date1}.csv'):
            data.to_csv(f'data_crawler/{file_name}/{file_name}_{today_date1}.csv', index=False, mode='w')
        else:
            data.to_csv(f'data_crawler/{file_name}/{file_name}_{today_date1}.csv', index=False, mode='a', header=False)
        print(f'{file_name}_로컬CSV저장_success_{time_line}')    
    except:
        print(f'{file_name}_로컬CSV저장_fail_{time_line}')
    
    
    # Google Storage 적재
    source_file_name = f'data_crawler/{file_name}/{file_name}_{today_date1}.csv'    # GCP에 업로드할 파일 절대경로
    destination_blob_name = f'data_crawler/{file_name}/{file_name}_{today_date1}.csv'    # 업로드할 파일을 GCP에 저장할 때의 이름
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)      
    
    try:
        # 빅쿼리 데이터 적재
        data.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
          project_id=project_id,
          if_exists='append',
          credentials=credentials)
        print(f'{file_name}_빅쿼리저장_success_{time_line}')    
    except:
        print(f'{file_name}_빅쿼리저장_fail_{time_line}')  
    
    
    try:
        # Postgresql 적재
        data.to_sql(f'{file_name}',if_exists='append', con=engine,  index=False)
        print(f'{file_name}_Postgresql저장_success_{time_line}')    
    except:
        print(f'{file_name}_Postgresql저장_fail_{time_line}')

# # 주식 정보


## 티커 리스트
market_list = ['KOSPI', 'KOSDAQ']
kor_ticker_list_df = pd.DataFrame()
for market_nm in market_list:
    ticker_list = stock.get_market_ticker_list(today_date1, market=market_nm)
    for tickers in ticker_list:
        corp_name = stock.get_market_ticker_name(tickers)
        df = pd.DataFrame({'ticker':tickers,
                           'corp_name':corp_name,
                           'market': market_nm
                          }, index = [0])
        kor_ticker_list_df = pd.concat([kor_ticker_list_df,df])
kor_ticker_list_df = kor_ticker_list_df.reset_index(drop = True)


now1 = datetime.now()
time_line = now1.strftime("%Y%m%d_%H:%M:%S")  

file_name = 'kor_ticker_list'
upload_df(kor_ticker_list_df, file_name, project_id, dataset_id, time_line, today_date1)
kor_ticker_list = kor_ticker_list_df['ticker']



kor_ticker_list_로컬CSV저장_success_20230823_06:47:35


100%|██████████| 1/1 [00:00<00:00, 3189.58it/s]


kor_ticker_list_빅쿼리저장_success_20230823_06:47:35
kor_ticker_list_Postgresql저장_success_20230823_06:47:35


In [12]:
kor_ticker_list

0       095570
1       006840
2       027410
3       282330
4       138930
         ...  
2614    024060
2615    010240
2616    189980
2617    037440
2618    238490
Name: ticker, Length: 2619, dtype: object

In [20]:
df_raw = stock.get_market_ohlcv('20180101', '20230822', '095570')
df_raw = df_raw.reset_index()
df_raw

Unnamed: 0,날짜,시가,고가,저가,종가,거래량,등락률
0,2018-01-02,6980,6990,6850,6970,60294,0.000000
1,2018-01-03,6970,7000,6910,6920,45848,-0.717360
2,2018-01-04,6990,7100,6920,7070,53176,2.167630
3,2018-01-05,7080,7300,7040,7250,58573,2.545969
4,2018-01-08,7330,7330,7100,7130,47104,-1.655172
...,...,...,...,...,...,...,...
1386,2023-08-16,4200,4270,4170,4185,85067,-3.460208
1387,2023-08-17,4120,4230,4030,4210,118070,0.597372
1388,2023-08-18,4200,4285,4105,4230,78260,0.475059
1389,2023-08-21,4155,4225,4120,4130,36141,-2.364066


In [23]:
df_raw = stock.get_market_ohlcv(today_date1,  market="ALL")

In [24]:
df_raw

Unnamed: 0_level_0,시가,고가,저가,종가,거래량,거래대금,등락률
티커,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
060310,2655,2730,2605,2670,332121,890996095,0.75
095570,4050,4080,3995,4080,56157,225793830,0.74
006840,19460,19680,18970,18980,17623,335572550,-1.50
054620,9410,9670,8720,8850,1343295,12340037500,0.68
265520,18450,18450,17910,17960,69413,1252899700,-1.37
...,...,...,...,...,...,...,...
000540,2900,2925,2885,2910,19860,57662330,0.34
000545,4700,4705,4700,4705,1904,8958300,0.11
003280,1867,1885,1820,1829,345590,641761171,-1.98
037440,6540,6910,6540,6770,146711,991956290,3.04


In [25]:
df_raw = stock.get_market_ohlcv('20180101', '20230822', '095570')
df_raw

Unnamed: 0_level_0,시가,고가,저가,종가,거래량,등락률
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,6980,6990,6850,6970,60294,0.000000
2018-01-03,6970,7000,6910,6920,45848,-0.717360
2018-01-04,6990,7100,6920,7070,53176,2.167630
2018-01-05,7080,7300,7040,7250,58573,2.545969
2018-01-08,7330,7330,7100,7130,47104,-1.655172
...,...,...,...,...,...,...
2023-08-16,4200,4270,4170,4185,85067,-3.460208
2023-08-17,4120,4230,4030,4210,118070,0.597372
2023-08-18,4200,4285,4105,4230,78260,0.475059
2023-08-21,4155,4225,4120,4130,36141,-2.364066


In [15]:
df_raw = stock.get_market_ohlcv('20180101', '20230822', '095570')
df_raw = df_raw.reset_index()
# df_raw['날짜'] = today_date2
df_raw['티커'] = ticker_nm
df_raw = df_raw[['날짜', '시가', '고가', '저가', '종가', '거래량','티커']]
df_raw.columns = ['date', 'open', 'high', 'low', 'close', 'volume',  'ticker']

df_raw['date'] = pd.to_datetime(df_raw['date'])

file_name = 'kor_stock_ohlcv'

now1 = datetime.now()
time_line = now1.strftime("%Y%m%d_%H:%M:%S")

In [28]:
df_raw = stock.get_market_cap('20180101', '20230822', '095570')
stock.get_market_cap('20180101', '20230822', ticker_nm)

Unnamed: 0_level_0,시가총액,거래량,거래대금,상장주식수
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-02,326351396150,60294,417947300,46822295
2018-01-03,324010281400,45848,319107800,46822295
2018-01-04,331033625650,53176,372980010,46822295
2018-01-05,339461638750,58573,420815340,46822295
2018-01-08,333842963350,47104,338693000,46822295
...,...,...,...,...
2023-08-16,195951304575,85067,357310815,46822295
2023-08-17,197121861950,118070,485064050,46822295
2023-08-18,198058307850,78260,327369725,46822295
2023-08-21,193376078350,36141,150152665,46822295


In [30]:
df_raw = stock.get_market_cap(today_date1,  market="ALL")
df_raw

Unnamed: 0_level_0,종가,시가총액,거래량,거래대금,상장주식수
티커,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
005930,67100,400572409105000,9515526,635953731300,5969782550
373220,525000,122850000000000,190310,100645387000,234000000
000660,116000,84448274340000,2564626,295650361900,728002365
207940,763000,54305762000000,32290,24698625000,71174000
005490,551000,46598747730000,741248,407883425000,84571230
...,...,...,...,...,...
140660,1210,3009574920,0,0,2487252
245450,1598,2524776080,22,34696,1579960
288490,55,2379685275,139635,7674710,43267005
322190,163,1454807111,230,38896,8925197


In [None]:

# 주가 정보
print('주가정보 시작')
for ticker_nm in kor_ticker_list:
    try:
        df_raw = stock.get_market_ohlcv('20180101', '20230822', ticker_nm)
        df_raw = df_raw.reset_index()
        # df_raw['날짜'] = today_date2
        df_raw['티커'] = ticker_nm
        df_raw = df_raw[['날짜', '시가', '고가', '저가', '종가', '거래량', '거래대금', '등락률', '티커']]
        df_raw.columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'trading_value', 'price_change_percentage', 'ticker']
        
        df_raw['date'] = pd.to_datetime(df_raw['date'])
        
        file_name = 'kor_stock_ohlcv'
        
        now1 = datetime.now()
        time_line = now1.strftime("%Y%m%d_%H:%M:%S")
        
        upload_df(df_raw, file_name, project_id, dataset_id, time_line, today_date1)
        print(f'주가정보 완료_{ticker_nm}_{time_line}')
    except:
        print(f'주가정보 실패_{ticker_nm}_{time_line}')