In [2]:
import pandas as pd
import pandas_gbq
from pykrx import stock
from pykrx import bond
import FinanceDataReader as fdr

from time import sleep

import psycopg2 as pg2
from sqlalchemy import create_engine

from datetime import datetime
import os
import time

import glob
from google.cloud import bigquery
from google.oauth2 import service_account
from google.cloud import storage

# 경로 변경
os.chdir('/home/shjj08choi4/finance_mlops')


# import pydata_google_auth

# pandas_gbq
# 서비스 계정 키 JSON 파일 경로
key_path = glob.glob("key_value/*.json")[0]

# Credentials 객체 생성
credentials = service_account.Credentials.from_service_account_file(key_path)

# 빅쿼리 정보
project_id = 'owen-389015'
dataset_id = 'finance_mlops'


# GCP 클라이언트 객체 생성
storage_client = storage.Client(credentials = credentials, 
                         project = credentials.project_id)


# Postgresql 연결
db_connect_info = pd.read_csv('key_value/db_connect_info.csv')

username = db_connect_info['username'][0]
password = db_connect_info['password'][0]
host = db_connect_info['host'][0]
database = db_connect_info['database'][0]

engine = create_engine(f'postgresql+psycopg2://{username}:{password}@{host}:5432/{database}')

In [3]:
now = datetime.now()
today_date2 = now.strftime('%Y-%m-%d')
start_date2 = '2017-01-01'

## S&P 500 종목 리스트

In [4]:
# S&P 500 symbol list
snp500 = fdr.StockListing('S&P500')
snp500.columns = ['ticker', 'corp_name', 'sector', 'industry']
snp500.head()

Unnamed: 0,ticker,corp_name,sector,industry
0,MMM,3M,Industrials,Industrial Conglomerates
1,AOS,A. O. Smith,Industrials,Building Products
2,ABT,Abbott,Health Care,Health Care Equipment
3,ABBV,AbbVie,Health Care,Pharmaceuticals
4,ACN,Accenture,Information Technology,IT Consulting & Other Services


In [7]:
file_name = 'snp500_ticker_list'
# 빅쿼리 데이터 적재
snp500.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
  project_id=project_id,
  if_exists='replace',
  credentials=credentials)

# Postgresql 적재
snp500.to_sql(f'{file_name}',if_exists='replace', con=engine,  index=False)



snp500.to_csv(f'data_crawler/{file_name}.csv', index=False, mode='w')


# Google Storage 적재
bucket_name = 'finance-mlops'    # 서비스 계정 생성한 bucket 이름 입력
source_file_name = f'data_crawler/{file_name}.csv'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/{file_name}/{file_name}.csv'    # 업로드할 파일을 GCP에 저장할 때의 이름

bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)

sp500_ticker_list = snp500['ticker']

100%|██████████| 1/1 [00:00<00:00, 8338.58it/s]


## 주가 데이터 수집

In [8]:
for ticker_nm in sp500_ticker_list[:2]:
    file_name = 'snp500'
    try:
        # Apple(AAPL), 2017-01-01 ~ Now
        df_raw = fdr.DataReader(ticker_nm, start_date2,today_date2)
        df_raw['ticker'] = ticker_nm
        df_raw = df_raw.reset_index()
        
        # 빅쿼리 데이터 적재
        df_raw.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
          project_id=project_id,
          if_exists='append',
          credentials=credentials)
        
        # Postgresql 적재
        df_raw.to_sql(f'{file_name}',if_exists='append', con=engine,  index=False)
        
        if not os.path.exists(f'data_crawler/{file_name}.csv'):
            df_raw.to_csv(f'data_crawler/{file_name}.csv', index=False, mode='w')
        else:
            df_raw.to_csv(f'data_crawler/{file_name}.csv', index=False, mode='a', header=False)
        
        print(f'{ticker_nm} success')
    except:
        print(f'{ticker_nm} fail')   

# Google Storage 적재        
bucket_name = 'finance-mlops'    # 서비스 계정 생성한 bucket 이름 입력
source_file_name = f'data_crawler/{file_name}.csv'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/{file_name}/{file_name}.csv'    # 업로드할 파일을 GCP에 저장할 때의 이름

bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)    


100%|██████████| 1/1 [00:00<00:00, 3269.14it/s]


MMM success


100%|██████████| 1/1 [00:00<00:00, 8128.50it/s]

AOS success





## 비트코인

In [5]:
try:
    now1 = datetime.now()

    time_line = now1.strftime("%Y%m%d_%H:%M:%S")
    time.sleep(1)
    # Apple(AAPL), 2017-01-01 ~ Now
    df_raw = fdr.DataReader('BTC/KRW', start_date2,today_date2)
    df_raw['ticker'] = 'btc_krw'
    df_raw = df_raw.reset_index()
    df_raw.columns = ['date', 'open','high','low','close','adj_close','volume','ticker']

    df_raw_total = pd.concat([df_raw_total,df_raw])

    print(f'{ticker_nm} success_{time_line}')   
except:
    print(f'{ticker_nm} fail_{time_line}')

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-01,5.060929e+05,5.125672e+05,5.022874e+05,5.101036e+05,5.101036e+05,4.260775e+10
1,2016-01-02,5.104418e+05,5.121330e+05,5.072097e+05,5.090512e+05,5.090512e+05,3.534695e+10
2,2016-01-03,5.092157e+05,5.094095e+05,4.989786e+05,5.051554e+05,5.051554e+05,4.655981e+10
3,2016-01-04,5.052142e+05,5.172751e+05,5.059330e+05,5.146464e+05,5.146464e+05,4.572320e+10
4,2016-01-05,5.146202e+05,5.148096e+05,5.100812e+05,5.139287e+05,5.139287e+05,4.107361e+10
...,...,...,...,...,...,...,...
2770,2023-08-02,3.736512e+07,3.826388e+07,3.698184e+07,3.826388e+07,3.826388e+07,2.356041e+13
2771,2023-08-03,3.830051e+07,3.867243e+07,3.764353e+07,3.778495e+07,3.778495e+07,2.490224e+13
2772,2023-08-04,,,,,,
2773,2023-08-05,,,,,,


In [10]:
df_raw = fdr.DataReader('BTC/KRW', "2016-01-01",today_date2)
df_raw = df_raw.reset_index()

file_name = 'btc_df'

df_raw.to_csv(f'data_crawler/{file_name}.csv', index=False, mode='w')

# 빅쿼리 데이터 적재
df_raw.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
  project_id=project_id,
  if_exists='append',
  credentials=credentials)

# Postgresql 적재
df_raw.to_sql(f'{file_name}',if_exists='append', con=engine,  index=False)

# Google Storage 적재
bucket_name = 'finance-mlops'    # 서비스 계정 생성한 bucket 이름 입력
source_file_name = f'data_crawler/{file_name}.csv'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/{file_name}/{file_name}.csv'    # 업로드할 파일을 GCP에 저장할 때의 이름

bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)    


100%|██████████| 1/1 [00:00<00:00, 2513.06it/s]


### 환율

In [11]:
file_name = 'usd_krw'
ticker_nm = 'usd_krw'
### 날짜 설정
now = datetime.now()

today_date1 = now.strftime('%Y%m%d')
today_date2 = now.strftime('%Y-%m-%d')
start_date2 = today_date2

# df_raw = fdr.DataReader('USD/KRW', "2016-01-01",today_date2)
df_raw = fdr.DataReader('USD/KRW', start_date2,today_date2)

df_raw = df_raw.reset_index()
df_raw['ticker'] = ticker_nm
df_raw = df_raw.reset_index(drop = True)
# df_raw.columns = ['date', 'open','high','low','close','adj_close','volume','ticker']

df_raw

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ticker
0,2023-11-29,1288.0,1291.819946,1284.719971,1290.400024,1290.400024,0,usd_krw


In [11]:
df_raw = fdr.DataReader('USD/KRW', "2016-01-01",today_date2)
df_raw = df_raw.reset_index()

file_name = 'usdkrw'

df_raw.to_csv(f'data_crawler/{file_name}.csv', index=False, mode='w')

# 빅쿼리 데이터 적재
df_raw.to_gbq(destination_table=f'{project_id}.{dataset_id}.{file_name}',
  project_id=project_id,
  if_exists='append',
  credentials=credentials)

# Postgresql 적재
df_raw.to_sql(f'{file_name}',if_exists='append', con=engine,  index=False)

# Google Storage 적재
bucket_name = 'finance-mlops'    # 서비스 계정 생성한 bucket 이름 입력
source_file_name = f'data_crawler/{file_name}.csv'    # GCP에 업로드할 파일 절대경로
destination_blob_name = f'data_crawler/{file_name}/{file_name}.csv'    # 업로드할 파일을 GCP에 저장할 때의 이름

bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)    


100%|██████████| 1/1 [00:00<00:00, 3407.23it/s]


In [12]:
df_raw = fdr.DataReader('USD/KRW', '2023-01-01','2023-11-11')

In [13]:
df_raw

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-02,1260.910034,1273.930054,1256.550049,1260.910034,1260.910034,0
2023-01-03,1270.099976,1278.969971,1264.390015,1270.099976,1270.099976,0
2023-01-04,1279.079956,1279.719971,1267.560059,1279.079956,1279.079956,0
2023-01-05,1271.079956,1278.229980,1265.739990,1271.079956,1271.079956,0
2023-01-06,1274.180054,1274.180054,1253.010010,1274.180054,1274.180054,0
...,...,...,...,...,...,...
2023-11-06,1307.839966,1311.650024,1290.959961,1307.839966,1307.839966,0
2023-11-07,1297.109985,1312.439941,1297.579956,1297.109985,1297.109985,0
2023-11-08,1304.550049,1314.189941,1298.689941,1304.550049,1304.550049,0
2023-11-09,1308.119995,1313.890015,1302.819946,1308.119995,1308.119995,0
