In [None]:
# This script was originally written in Korean. Comments and string literals have been translated into English for wider accessibility.

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
import os

# Selenium settings
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.set_page_load_timeout(60)  # Set page load timeout (seconds)

# Set page range for crawling
BASE_URL = "https://gall.dcinside.com/board/lists/?id=bitcoins_new1&page="
start_page = 26400
end_page = 30000  # Target end page
data = []
save_interval = 100  # Data saving interval (pages)

# Data saving function
def save_data_to_csv(data, filename="01_dc_bitcoin_data.csv"):
    df = pd.DataFrame(data)
    if os.path.exists(filename):
        df.to_csv(filename, mode='a', header=False, encoding='utf-8-sig', index=False)  # Append to existing file
    else:
        df.to_csv(filename, encoding='utf-8-sig', index=False)  # Create new file
    print(f"Saved {len(data)} data items.")  # Changed from "{len(data)}개의 데이터를 저장했습니다."
    data.clear()  # Updated print statement

# Page crawling function
def fetch_page_data(page):
    try:
        driver.get(BASE_URL + str(page))
        time.sleep(2)  # Wait for page to load
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        articles = soup.select('tbody > tr.ub-content')  # Select article rows

        page_data = []
        for article in articles:
            gall_no = article.select_one('td.gall_num').text.strip()
            if not gall_no.isdigit():  # Skip if ID is not a digit (likely an advertisement)
                continue

            title = article.select_one('td.gall_tit > a').text.strip()
            date = article.select_one('td.gall_date').text.strip()
            views = article.select_one('td.gall_count').text.strip()
            recommends = article.select_one('td.gall_recommend').text.strip()

            page_data.append({
                'Post_ID': gall_no, # Changed from '게시물 번호'
                'Title': title,     # Changed from '제목'
                'Date': date,       # Changed from '작성일'
                'Views': views,     # Changed from '조회수'
                'Recommends': recommends # Changed from '추천수'
            })
        return page_data
    except Exception as e:
        print(f"Error on page {page}: {e}")
        return []

# Main crawling loop
try:
    for page in range(start_page, end_page + 1):
        print(f"Crawling page {page}...") # Changed from "{page}페이지 크롤링 중..."
        page_data = fetch_page_data(page)
        data.extend(page_data)

        if page % save_interval == 0 or page == end_page:  # Save data periodically
            save_data_to_csv(data)
except KeyboardInterrupt:
    print("Crawling interrupted.") # Changed from "크롤링이 중단되었습니다."
finally:
    # 남은 데이터 저장
    if data:
        save_data_to_csv(data)
    driver.quit()
    print("Crawling finished.") # Changed from "크롤링 종료"