In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs
from tqdm import tqdm
import time
import random
import lxml
import pandas as pd

In [2]:
# 출력 행 제한을 없애는 코드
pd.set_option('display.max_rows', 200)

### 날짜 생성 코드

In [3]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

# 오늘 날짜를 가져옵니다.
today = datetime.today()
# 10년 전의 날짜를 계산합니다.
ten_years_ago = today - relativedelta(years=10)

# 날짜 범위를 생성합니다.
dates = [ten_years_ago + relativedelta(days=i) for i in range((today - ten_years_ago).days)]

# 결과를 출력합니다.
for date in dates[::-1][:10]:
    print(date.strftime('%Y-%m-%d'))

2024-07-30
2024-07-29
2024-07-28
2024-07-27
2024-07-26
2024-07-25
2024-07-24
2024-07-23
2024-07-22
2024-07-21


In [4]:
# 크롬 옵션즈에 User-Agent, lang 같은 정보를 담아 셀레니움을 이용한 크롤링이 아닌 것 처럼 만들기
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
# options.add_argument('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36')
options.add_argument("lang=ko_KR")

# 웹드라이버를 버전에 맞게 자동으로 다운 받고 옵션을 추가해 줌
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.set_window_size(1920,1080)   # 웹브라우저 해상도 조절

driver.get("https://www.hanabank.com/cms/rate/index.do?contentUrl=/cms/rate/wpfxd651_01i.do#//HanaBank")  # 크롤링 할 웹사이트 주소

In [6]:
df_list = []
for idx, date in enumerate(dates[::-1][:1]):
    # 웹 요소가 나타날 때까지 최대 10초 동안 기다립니다.
    wait = WebDriverWait(driver, 10)

    # 날짜 박스 #tmpInqStrDt
    datebox = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#tmpInqStrDt')))
    datebox.clear()   # 기존에 입력된 값을 지웁니다.
    datebox.send_keys(f"{date.date()}")   # 새로운 값을 입력합니다.

    # 조회버튼   #HANA_CONTENTS_DIV > div.btnBoxCenter > a
    search_box = driver.find_element(By.CSS_SELECTOR, '#HANA_CONTENTS_DIV > div.btnBoxCenter > a')
    search_box.click()   # 조회버튼 클릭
    # 생성한 시간 동안 대기합니다.
    time.sleep(random.uniform(3, 7))
    exchange_rate_table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#searchContentDiv > div.printdiv > table')))

    # 페이지의 전체 HTML 소스를 가져옵니다.
    page_html = driver.page_source
    # soup = bs(page_html, "lxml")     # beautifulsoup으로 html을 parsing
    exchange_rate = pd.read_html(page_html)
    exchange_rate = exchange_rate[1]
    exchange_rate['date'] = f"{date.date()}"
    df_list.append(exchange_rate)
    print(f"전체 {len(dates)} 중 {idx+1:04d} 페이지 완료", end="\r")
# 리스트의 모든 DataFrame을 하나로 합칩니다.
final_df = pd.concat(df_list)

전체 3653 중 0001 페이지 완료

  exchange_rate = pd.read_html(page_html)


In [7]:
final_df['date'][0]

'2024-07-30'

In [9]:
final_df[final_df['date'] == '2024-07-30']

Unnamed: 0_level_0,통화,현찰,현찰,현찰,현찰,송금,송금,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,date
Unnamed: 0_level_1,통화,사실 때,사실 때,파실 때,파실 때,보낼 때,받을 때,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,Unnamed: 13_level_1
Unnamed: 0_level_2,통화,환율,Spread,환율,Spread,보낼 때,받을 때,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,Unnamed: 13_level_2
0,미국 USD,1409.23,1.75,1360.77,1.75,1398.5,1371.5,0.0,1368.74,1385.0,7.19371,1.0,2024-07-30
1,일본 JPY (100),921.73,1.75,890.03,1.75,914.75,897.01,0.0,896.52,905.88,2.19682,0.6541,2024-07-30
2,유로 EUR,1527.47,1.99,1467.87,1.99,1512.64,1482.7,0.0,1480.38,1497.67,5.596,1.0814,2024-07-30
3,중국 CNY,200.76,5.0,181.64,5.0,193.11,189.29,0.0,0.0,191.2,4.87455,0.1381,2024-07-30
4,홍콩 HKD,180.76,1.97,173.78,1.97,179.04,175.5,0.0,175.22,177.27,6.57333,0.128,2024-07-30
5,태국 THB,40.54,5.0,36.3,6.0,38.99,38.23,0.0,38.19,38.61,4.525,0.0279,2024-07-30
6,대만 TWD,47.7,13.1,37.97,10.0,0.0,0.0,0.0,0.0,42.18,3.53333,0.0305,2024-07-30
7,필리핀 PHP,26.0,10.0,21.71,8.2,23.87,23.41,0.0,0.0,23.64,8.253,0.0171,2024-07-30
8,싱가포르 SGD,1052.49,1.99,1011.43,1.99,1042.27,1021.65,0.0,1019.78,1031.96,7.36633,0.7451,2024-07-30
9,호주 AUD,923.34,1.97,887.68,1.97,914.56,896.46,0.0,894.85,905.51,6.51433,0.6538,2024-07-30


In [11]:
final_df.columns

MultiIndex([(       '통화',        '통화',        '통화'),
            (       '현찰',      '사실 때',        '환율'),
            (       '현찰',      '사실 때',    'Spread'),
            (       '현찰',      '파실 때',        '환율'),
            (       '현찰',      '파실 때',    'Spread'),
            (       '송금',      '보낼 때',      '보낼 때'),
            (       '송금',      '받을 때',      '받을 때'),
            (  'T/C 사실때',   'T/C 사실때',   'T/C 사실때'),
            ('외화 수표 파실때', '외화 수표 파실때', '외화 수표 파실때'),
            (   '매매 기준율',    '매매 기준율',    '매매 기준율'),
            (    '환가 료율',     '환가 료율',     '환가 료율'),
            (   '미화 환산율',    '미화 환산율',    '미화 환산율'),
            (     'date',          '',          '')],
           )

In [12]:
final_df.columns = ['통화', '현찰_사실때_환율', '현찰_사실때_Spread', '현찰_파실때_환율', '현찰_파실때_Spread',\
'송금_보낼때', '송금_받을때','T/C_파실때', '외화 수표 파실때', '매매 기준율', '환가 료율', '미화 환산율', 'date']
                    

In [13]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58 entries, 0 to 57
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   통화             58 non-null     object 
 1   현찰_사실때_환율      58 non-null     float64
 2   현찰_사실때_Spread  58 non-null     float64
 3   현찰_파실때_환율      58 non-null     float64
 4   현찰_파실때_Spread  58 non-null     float64
 5   송금_보낼때         58 non-null     float64
 6   송금_받을때         58 non-null     float64
 7   T/C_파실때        58 non-null     float64
 8   외화 수표 파실때      58 non-null     float64
 9   매매 기준율         58 non-null     float64
 10  환가 료율          58 non-null     float64
 11  미화 환산율         58 non-null     float64
 12  date           58 non-null     object 
dtypes: float64(11), object(2)
memory usage: 6.0+ KB


In [14]:
final_df

Unnamed: 0,통화,현찰_사실때_환율,현찰_사실때_Spread,현찰_파실때_환율,현찰_파실때_Spread,송금_보낼때,송금_받을때,T/C_파실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,date
0,미국 USD,1409.23,1.75,1360.77,1.75,1398.5,1371.5,0.0,1368.74,1385.0,7.19371,1.0,2024-07-30
1,일본 JPY (100),921.73,1.75,890.03,1.75,914.75,897.01,0.0,896.52,905.88,2.19682,0.6541,2024-07-30
2,유로 EUR,1527.47,1.99,1467.87,1.99,1512.64,1482.7,0.0,1480.38,1497.67,5.596,1.0814,2024-07-30
3,중국 CNY,200.76,5.0,181.64,5.0,193.11,189.29,0.0,0.0,191.2,4.87455,0.1381,2024-07-30
4,홍콩 HKD,180.76,1.97,173.78,1.97,179.04,175.5,0.0,175.22,177.27,6.57333,0.128,2024-07-30
5,태국 THB,40.54,5.0,36.3,6.0,38.99,38.23,0.0,38.19,38.61,4.525,0.0279,2024-07-30
6,대만 TWD,47.7,13.1,37.97,10.0,0.0,0.0,0.0,0.0,42.18,3.53333,0.0305,2024-07-30
7,필리핀 PHP,26.0,10.0,21.71,8.2,23.87,23.41,0.0,0.0,23.64,8.253,0.0171,2024-07-30
8,싱가포르 SGD,1052.49,1.99,1011.43,1.99,1042.27,1021.65,0.0,1019.78,1031.96,7.36633,0.7451,2024-07-30
9,호주 AUD,923.34,1.97,887.68,1.97,914.56,896.46,0.0,894.85,905.51,6.51433,0.6538,2024-07-30


In [19]:
import dbio

In [21]:
final_df['date'][0]

'2024-07-30'

In [None]:
to_db(f"{final_df['date'][0][}exchange_rate" 

In [23]:
57*365*10

208050

In [24]:
for mon in range(1,13):
    print(mon)

1
2
3
4
5
6
7
8
9
10
11
12
