# 홈플러스 상품 데이터 크롤링 (2024년 5월 기준)
홈플러스 모바일몰(https://mfront.homeplus.co.kr/leaflet?gnbNo=207&homeType=MART)에서 Selenium을 활용해 신선식품 데이터를 크롤링합니다. 사이트 구조가 자주 바뀌므로, 크롤링이 안 될 경우 개발자도구로 실제 상품 블록의 CSS 선택자를 반드시 확인해야 합니다.

크롬드라이버는 https://googlechromelabs.github.io/chrome-for-testing/#stable 에서 다운로드했습니다.

In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
import time

In [17]:
def get_homeplus_items_detail():
    CHROME_DRIVER_PATH = r"D:\github\angeon-llm\chromedriver-win64\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument(
        "user-agent=Mozilla/5.0 (Linux; Android 10; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
    )

    driver = webdriver.Chrome(service=Service(CHROME_DRIVER_PATH), options=options)
    url = "https://mfront.homeplus.co.kr/leaflet?gnbNo=207&homeType=MART"
    driver.get(url)
    time.sleep(5)

    # 스크롤 자동화 (더보기 버튼이 있으면 클릭, 없으면 스크롤)
    SCROLL_PAUSE_SEC = 1.5
    for _ in range(15):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(SCROLL_PAUSE_SEC)
        # 더보기 버튼이 있으면 클릭
        try:
            more_btn = driver.find_element(By.CSS_SELECTOR, 'button.moreBtn, button.css-1o6z5ng')
            if more_btn.is_displayed():
                more_btn.click()
                time.sleep(1)
        except:
            pass

    # 실제 상품 블록의 CSS 선택자 확인 (2024년 5월 기준)
    product_blocks = driver.find_elements(By.CSS_SELECTOR, "div.unitItemWrap.typeCard div.unitItem.typeC4")
    print(f"총 상품 수: {len(product_blocks)}개")

    items = []
    for block in product_blocks:
        try:
            name = block.find_element(By.CSS_SELECTOR, "p.css-xktqki").text.strip()
            price = block.find_element(By.CSS_SELECTOR, "strong.priceValue").text.strip()
            link_el = block.find_element(By.CSS_SELECTOR, "a.productTitle")
            relative_url = link_el.get_attribute("href")
            full_url = f"https://m.homeplus.co.kr{relative_url}" if relative_url and relative_url.startswith("/") else relative_url
            img_url = block.find_element(By.CSS_SELECTOR, "img").get_attribute("src")
            price_qty = block.find_element(By.CSS_SELECTOR, ".priceQty").text.strip() if block.find_elements(By.CSS_SELECTOR, ".priceQty") else ""
            recom_comment = block.find_element(By.CSS_SELECTOR, ".recomComment").text.strip() if block.find_elements(By.CSS_SELECTOR, ".recomComment") else ""
            score = block.find_element(By.CSS_SELECTOR, ".score").text.strip() if block.find_elements(By.CSS_SELECTOR, ".score") else ""
            review_cnt = block.find_element(By.CSS_SELECTOR, ".reviewCnt").text.strip() if block.find_elements(By.CSS_SELECTOR, ".reviewCnt") else ""
            sale_cnt = block.find_element(By.CSS_SELECTOR, ".saleCount").text.strip() if block.find_elements(By.CSS_SELECTOR, ".saleCount") else ""
            discount_rate = block.find_element(By.CSS_SELECTOR, ".discountRate").text.strip() if block.find_elements(By.CSS_SELECTOR, ".discountRate") else ""
            org_price = ""
            if block.find_elements(By.CSS_SELECTOR, ".orgPrice .priceValue"):
                org_price = block.find_element(By.CSS_SELECTOR, ".orgPrice .priceValue").text.strip()

            items.append({
                "상품명": name,
                "가격": price,
                "원가": org_price,
                "할인율": discount_rate,
                "링크": full_url,
                "이미지": img_url,
                "100g/1개당 가격": price_qty,
                "추천코멘트": recom_comment,
                "평점": score,
                "리뷰수": review_cnt,
                "판매량": sale_cnt
            })
        except Exception as e:
            print(f"상품 처리 오류: {e}")
            continue

    driver.quit()
    return pd.DataFrame(items)

In [18]:
df_homeplus = get_homeplus_items_detail()
df_homeplus.to_csv("homeplus_items_detail.csv", index=False, encoding="utf-8-sig")
df_homeplus.head()

총 상품 수: 0개
