In [None]:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import pandas as pd
import re

def sanitize_text(text):
    text = re.sub(r'[\[\]\*\?/\\:]', '', text)
    text = ''.join(ch for ch in text if ch.isprintable())
    return text.strip()

def get_blog_full_text(driver, url):
    try:
        driver.get(url)
        time.sleep(2)
        try:
            iframe = driver.find_element(By.TAG_NAME, "iframe")
            driver.switch_to.frame(iframe)
            time.sleep(2)
        except:
            print("⚠️ iframe 없음 또는 전환 실패 → 기본 페이지에서 본문 추출 시도")
        soup = BeautifulSoup(driver.page_source, "html.parser")
        candidates = [
            "div.se-main-container",
            "div#postViewArea",
            "div.post_ct",
            "div.contentArea"
        ]
        for selector in candidates:
            content = soup.select_one(selector)
            if content:
                return sanitize_text(content.get_text(separator="\n"))
        return ""
    except Exception as e:
        print("❌ 본문 추출 오류:", e)
        return ""

def crawl_naver_blog(driver, keyword, max_pages=2):
    results = []
    for page in range(1, max_pages + 1):
        start = (page - 1) * 10 + 1
        url = f"https://search.naver.com/search.naver?query={keyword}&where=blog&start={start}"
        driver.get(url)
        time.sleep(2)

        links = driver.find_elements(By.CSS_SELECTOR, "a.api_txt_lines")
        print(f"🔗 Page {page} - 링크 수집 개수: {len(links)}")

        for link in links:
            try:
                title = link.text
                href = link.get_attribute("href")
                if "blog.naver.com" not in href:
                    continue
                print(f"🔗 링크: {href}")
                body = get_blog_full_text(driver, href)
                print(f"📄 {title[:30]} → 본문 길이: {len(body)}")
                print(f"📝 내용 미리보기: {body[:100]}\n")
                # 필터 제거: 모든 결과 저장
                results.append({'keyword': keyword, 'title': title, 'link': href, 'body': body})
            except Exception as e:
                print("⚠️ 링크 처리 오류:", e)
                continue
    return results

def main():
    search_keyword = input("🔍 검색 키워드 입력 (예: 부트캠프): ").strip()

    options = Options()
    # options.add_argument("--headless=new")  # 디버깅 중에는 주석 처리
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--window-size=1920,1080")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    print(f"▶ '{search_keyword}' 크롤링 시작...")
    results = crawl_naver_blog(driver, search_keyword)

    driver.quit()

    print(f"\n✅ 총 {len(results)}개 결과 수집됨!\n")
    for item in results:
        print(f"[{item['title']}] → {item['link']}")

    df = pd.DataFrame(results)
    df.to_csv("naver_blog_debug_output.csv", index=False, encoding='utf-8-sig')
    print("\n📁 저장 완료: naver_blog_debug_output.csv")

if __name__ == "__main__":
    main()
