In [30]:
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import pandas_gbq
from pykrx import stock
from pykrx import bond
import FinanceDataReader as fdr


from time import sleep

import psycopg2 as pg2
from sqlalchemy import create_engine

from datetime import datetime
from datetime import timedelta

import os
import time

import glob
from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage


# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')


# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owenchoi-404302'
dataset_id = 'finance_mlops'

# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials, 
                         project = credentials.project_id)
bucket_name = 'finance-mlops-proj'     # 서비스 계정 생성한 bucket 이름 입력

# Postgresql 연결
db_connect_info = pd.read_csv('key_value/db_connect_info.csv')
username = db_connect_info['username'][0]
password = db_connect_info['password'][0]
host = db_connect_info['host'][0]
database = db_connect_info['database'][0]
engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:5432/{database}')



In [17]:
engine

Engine(postgresql+psycopg2://owen:***@34.64.237.167:5432/finance_mlops)

In [2]:

now = datetime.now()
# now = now + timedelta(days=-2)
today_date1 = now.strftime('%Y%m%d')
start_date1 = '20180101'
today_date1 = '20231031'
today_date2 = now.strftime('%Y-%m-%d')
today_date_time_csv = now.strftime("%Y%m%d_%H%M")

def upload_df(data, file_name, project_id, dataset_id, time_line):
    if not os.path.exists(f'data_crawler/{file_name}'):
        os.makedirs(f'data_crawler/{file_name}')

    try:
        if not os.path.exists(f'data_crawler/{file_name}/{file_name}.csv'):
            data.to_csv(f'data_crawler/{file_name}/{file_name}.csv', index=False, mode='w')
        else:
            data.to_csv(f'data_crawler/{file_name}/{file_name}.csv', index=False, mode='a', header=False)
        print(f'{file_name}_로컬CSV저장_success_{time_line}')    
    except:
        print(f'{file_name}_로컬CSV저장_fail_{time_line}')
    
    
    # Google Storage 적재
    source_file_name = f'data_crawler/{file_name}/{file_name}.csv'    # GCP에 업로드할 파일 절대경로
    destination_blob_name = f'data_crawler/{file_name}/{file_name}.csv'    # 업로드할 파일을 GCP에 저장할 때의 이름
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)

    try:
        # 빅쿼리 데이터 적재
        data.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
          project_id=project_id,
          if_exists='append',
          credentials=credentials)
        print(f'{file_name}_빅쿼리저장_success_{time_line}')
    except:
        print(f'{file_name}_빅쿼리저장_fail_{time_line}')


    try:
        # Postgresql 적재
        data.to_sql(f'{file_name}',if_exists='append', con=engine,  index=False)
        print(f'{file_name}_Postgresql저장_success_{time_line}')
    except:
        print(f'{file_name}_Postgresql저장_fail_{time_line}')

# # 주식 정보


In [3]:
## 티커 리스트
market_list = ['KOSPI', 'KOSDAQ']
kor_ticker_list_df = pd.DataFrame()
for market_nm in market_list:
    ticker_list = stock.get_market_ticker_list(today_date1, market=market_nm)
    for tickers in ticker_list:
        corp_name = stock.get_market_ticker_name(tickers)
        df = pd.DataFrame({'ticker':tickers,
                           'corp_name':corp_name,
                           'market': market_nm
                          }, index = [0])
        kor_ticker_list_df = pd.concat([kor_ticker_list_df,df])
kor_ticker_list_df = kor_ticker_list_df.reset_index(drop = True)


now1 = datetime.now()
time_line = now1.strftime("%Y%m%d_%H:%M:%S")  

file_name = 'kor_ticker_list'
upload_df(kor_ticker_list_df, file_name, project_id, dataset_id, time_line)
kor_ticker_list = kor_ticker_list_df['ticker']


kor_ticker_list_로컬CSV저장_success_20231207_05:17:04
kor_ticker_list_빅쿼리저장_fail_20231207_05:17:04
kor_ticker_list_Postgresql저장_fail_20231207_05:17:04


In [19]:
kor_ticker_list_df = pd.read_csv('data_crawler/kor_ticker_list/kor_ticker_list.csv')

In [8]:
now1 = datetime.now()
time_line = now1.strftime("%Y%m%d_%H:%M:%S")  

file_name = 'kor_ticker_list'

In [6]:
file_name = 'kor_ticker_list_df'

In [9]:
upload_df(kor_ticker_list_df, file_name, project_id, dataset_id, time_line)
kor_ticker_list = kor_ticker_list_df['ticker']

kor_ticker_list_로컬CSV저장_success_20231207_05:25:44
kor_ticker_list_빅쿼리저장_fail_20231207_05:25:44
kor_ticker_list_Postgresql저장_fail_20231207_05:25:44


In [24]:
kor_ticker_list_df

Unnamed: 0,ticker,corp_name,market
0,095570,AJ네트웍스,KOSPI
1,006840,AK홀딩스,KOSPI
2,027410,BGF,KOSPI
3,282330,BGF리테일,KOSPI
4,138930,BNK금융지주,KOSPI
5,001460,BYC,KOSPI
6,001465,BYC우,KOSPI
7,001040,CJ,KOSPI
8,079160,CJ CGV,KOSPI
9,00104K,CJ4우(전환),KOSPI


In [25]:
file_name

'kor_ticker_list'

In [26]:
kor_ticker_list_df.to_sql(f'{file_name}',if_exists='append', con=engine,  index=False)
print(f'{file_name}_Postgresql저장_success_{time_line}')

kor_ticker_list_Postgresql저장_success_20231207_05:25:44


In [31]:
kor_ticker_list_df.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
  project_id=project_id,
  if_exists='append',
  credentials=credentials)
print(f'{file_name}_빅쿼리저장_success_{time_line}')

100%|██████████| 1/1 [00:00<00:00, 3724.96it/s]

kor_ticker_list_빅쿼리저장_success_20231207_05:25:44





In [27]:
project_id

'owen-404302'

In [10]:
kor_ticker_list_df

Unnamed: 0,ticker,corp_name,market
0,095570,AJ네트웍스,KOSPI
1,006840,AK홀딩스,KOSPI
2,027410,BGF,KOSPI
3,282330,BGF리테일,KOSPI
4,138930,BNK금융지주,KOSPI
...,...,...,...
13165,024060,흥구석유,KOSDAQ
13166,010240,흥국,KOSDAQ
13167,189980,흥국에프엔비,KOSDAQ
13168,037440,희림,KOSDAQ
