In [80]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs
from tqdm import tqdm
import time
import random
import lxml
import pandas as pd

In [83]:
# 출력 행 제한을 없애는 코드
pd.set_option('display.max_rows', 200)

### 날짜 생성 코드

In [29]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

# 오늘 날짜를 가져옵니다.
today = datetime.today()
# 10년 전의 날짜를 계산합니다.
ten_years_ago = today - relativedelta(years=10)

# 날짜 범위를 생성합니다.
dates = [ten_years_ago + relativedelta(days=i) for i in range((today - ten_years_ago).days)]

# 결과를 출력합니다.
for date in dates[::-1][:10]:
    print(date.strftime('%Y-%m-%d'))

2024-07-18
2024-07-17
2024-07-16
2024-07-15
2024-07-14
2024-07-13
2024-07-12
2024-07-11
2024-07-10
2024-07-09


In [30]:
# 크롬 옵션즈에 User-Agent, lang 같은 정보를 담아 셀레니움을 이용한 크롤링이 아닌 것 처럼 만들기
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_argument('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36')
options.add_argument("lang=ko_KR")

# 웹드라이버를 버전에 맞게 자동으로 다운 받고 옵션을 추가해 줌
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.set_window_size(1920,1080)   # 웹브라우저 해상도 조절

driver.get("https://www.hanabank.com/cms/rate/index.do?contentUrl=/cms/rate/wpfxd651_01i.do#//HanaBank")  # 크롤링 할 웹사이트 주소

In [82]:
df_list = []
for idx, date in enumerate(dates[::-1][:3]):
    # 웹 요소가 나타날 때까지 최대 10초 동안 기다립니다.
    wait = WebDriverWait(driver, 10)

    # 날짜 박스 #tmpInqStrDt
    datebox = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#tmpInqStrDt')))
    datebox.clear()   # 기존에 입력된 값을 지웁니다.
    datebox.send_keys(f"{date.date()}")   # 새로운 값을 입력합니다.

    # 조회버튼   #HANA_CONTENTS_DIV > div.btnBoxCenter > a
    search_box = driver.find_element(By.CSS_SELECTOR, '#HANA_CONTENTS_DIV > div.btnBoxCenter > a')
    search_box.click()   # 조회버튼 클릭
    # 생성한 시간 동안 대기합니다.
    time.sleep(random.uniform(3, 7))
    exchange_rate_table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#searchContentDiv > div.printdiv > table')))

    # 페이지의 전체 HTML 소스를 가져옵니다.
    page_html = driver.page_source
    # soup = bs(page_html, "lxml")     # beautifulsoup으로 html을 parsing
    exchange_rate = pd.read_html(page_html)
    exchange_rate = exchange_rate[1]
    exchange_rate['date'] = f"{date.date()}"
    df_list.append(exchange_rate)
    print(f"전체 {len(dates)} 중 {idx+1:04d} 페이지 완료", end="\r")
# 리스트의 모든 DataFrame을 하나로 합칩니다.
final_df = pd.concat(df_list)

전체 3653 중 0003 페이지 완료

In [75]:
final_df['date'][0]

0    2024-07-18
0    2024-07-17
0    2024-07-16
Name: date, dtype: object

In [78]:
final_df[final_df['date'] == '2024-07-16']

Unnamed: 0_level_0,통화,현찰,현찰,현찰,현찰,송금,송금,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,date
Unnamed: 0_level_1,통화,사실 때,사실 때,파실 때,파실 때,보낼 때,받을 때,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,Unnamed: 13_level_1
Unnamed: 0_level_2,통화,환율,Spread,환율,Spread,보낼 때,받을 때,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,Unnamed: 13_level_2
0,미국 USD,1405.16,1.75,1356.84,1.75,1394.5,1367.5,0.0,1364.75,1381.0,7.18399,1.0,2024-07-16
1,일본 JPY (100),899.59,1.75,868.65,1.75,892.78,875.46,0.0,874.99,884.12,2.18864,0.6402,2024-07-16
2,유로 EUR,1540.17,1.99,1480.07,1.99,1525.22,1495.02,0.0,1492.7,1510.12,5.547,1.0935,2024-07-16
3,중국 CNY,199.4,5.0,180.42,5.0,191.8,188.02,0.0,0.0,189.91,5.54364,0.1375,2024-07-16
4,홍콩 HKD,180.37,1.97,173.41,1.97,178.65,175.13,0.0,174.85,176.89,6.53933,0.1281,2024-07-16
5,태국 THB,40.35,5.0,36.13,6.0,38.81,38.05,0.0,38.01,38.43,4.525,0.0278,2024-07-16
6,대만 TWD,47.95,13.1,38.16,10.0,0.0,0.0,0.0,0.0,42.4,3.51666,0.0307,2024-07-16
7,필리핀 PHP,26.07,10.0,21.76,8.2,23.93,23.47,0.0,0.0,23.7,7.661,0.0172,2024-07-16
8,싱가포르 SGD,1050.55,1.99,1009.57,1.99,1040.36,1019.76,0.0,1017.9,1030.06,7.35033,0.7459,2024-07-16
9,호주 AUD,947.01,1.97,910.43,1.97,938.0,919.44,0.0,917.8,928.72,6.46966,0.6725,2024-07-16


In [84]:
final_df

Unnamed: 0_level_0,통화,현찰,현찰,현찰,현찰,송금,송금,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,date
Unnamed: 0_level_1,통화,사실 때,사실 때,파실 때,파실 때,보낼 때,받을 때,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,Unnamed: 13_level_1
Unnamed: 0_level_2,통화,환율,Spread,환율,Spread,보낼 때,받을 때,T/C 사실때,외화 수표 파실때,매매 기준율,환가 료율,미화 환산율,Unnamed: 13_level_2
0,미국 USD,1410.25,1.75,1361.75,1.75,1399.5,1372.5,0.0,1369.74,1386.0,7.19119,1.0,2024-07-18
1,일본 JPY (100),896.0,1.75,865.18,1.75,889.21,871.97,0.0,871.5,880.59,2.18864,0.6353,2024-07-18
2,유로 EUR,1540.51,1.99,1480.41,1.99,1525.56,1495.36,0.0,1493.03,1510.46,5.558,1.0898,2024-07-18
3,중국 CNY,199.95,5.0,180.91,5.0,192.33,188.53,0.0,0.0,190.43,5.39439,0.1374,2024-07-18
4,홍콩 HKD,180.95,1.97,173.97,1.97,179.23,175.69,0.0,175.41,177.46,6.54233,0.128,2024-07-18
5,태국 THB,40.27,5.0,36.06,6.0,38.74,37.98,0.0,37.94,38.36,4.525,0.0277,2024-07-18
6,대만 TWD,47.93,13.1,38.15,10.0,0.0,0.0,0.0,0.0,42.38,3.51933,0.0306,2024-07-18
7,필리핀 PHP,26.15,10.0,21.84,8.2,24.01,23.55,0.0,0.0,23.78,7.667,0.0172,2024-07-18
8,싱가포르 SGD,1051.81,1.99,1010.77,1.99,1041.6,1020.98,0.0,1019.11,1031.29,7.357,0.7441,2024-07-18
9,호주 AUD,947.83,1.97,911.21,1.97,938.81,920.23,0.0,918.59,929.52,6.47233,0.6706,2024-07-18


In [73]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 174 entries, 0 to 57
Data columns (total 13 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   (통화, 통화, 통화)                       174 non-null    object 
 1   (현찰, 사실 때, 환율)                     174 non-null    float64
 2   (현찰, 사실 때, Spread)                 174 non-null    float64
 3   (현찰, 파실 때, 환율)                     174 non-null    float64
 4   (현찰, 파실 때, Spread)                 174 non-null    float64
 5   (송금, 보낼 때, 보낼 때)                   174 non-null    float64
 6   (송금, 받을 때, 받을 때)                   174 non-null    float64
 7   (T/C 사실때, T/C 사실때, T/C 사실때)        174 non-null    float64
 8   (외화 수표 파실때, 외화 수표 파실때, 외화 수표 파실때)  174 non-null    float64
 9   (매매 기준율, 매매 기준율, 매매 기준율)           174 non-null    float64
 10  (환가 료율, 환가 료율, 환가 료율)              174 non-null    float64
 11  (미화 환산율, 미화 환산율, 미화 환산율)           174 non-null    float64
 12  