In [29]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
import re
import time
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import openpyxl
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import os

In [30]:
# Chrome 옵션 설정
options = Options()

In [31]:
# WebDriver 초기화
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

In [32]:
# 웹사이트 로그인 페이지 열기
driver.get("https://shop.crossenf.com/?iso_code=TH")
time.sleep(4)  # 실제 페이지 로드 시간에 따라 조정

In [33]:
# --------------------------------------------------
# ✅ [1단계] 1번 스크롤 → 상품 수집 → 반복 (디코딩 포함)
# --------------------------------------------------

scroll_pause = 1.0
seen_names = set()
product_data = []

last_height = 0
scroll_round = 1  # 스크롤 횟수 카운터

while True:
    print(f"\n🔽 [스크롤 {scroll_round}회차] 페이지 끝으로 이동 중...")
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(scroll_pause)

    # 상품 수집
    products = driver.find_elements(By.CSS_SELECTOR, "div.grid.grid-cols-2.gap-x-10px.pb-20px > div")
    new_count = 0

    for product in products:
        try:
            # 상품명
            try:
                name_elem = product.find_element(By.CSS_SELECTOR, "p.text-14px")
                name = name_elem.text.strip()
            except:
                continue

            if name in seen_names:
                continue
            seen_names.add(name)
            new_count += 1

            # 이미지
            img = product.find_element(By.CSS_SELECTOR, "div img").get_attribute("src")

            # 가격 관련
            price_box = product.find_elements(By.CSS_SELECTOR, "div > div > p")
            prices = [p.text.strip() for p in price_box if p.text.strip()]

            origin_price = ""
            discount_price = ""
            discount_rate = ""

            if len(prices) == 1:
                discount_price = prices[0]
            elif len(prices) == 2:
                origin_price, discount_price = prices
            elif len(prices) == 3:
                origin_price, discount_rate, discount_price = prices

            # 할인율 (별도 span 처리)
            try:
                discount_span = product.find_element(By.XPATH, ".//span[contains(text(), '%')]")
                discount_rate = discount_span.text.strip()
            except:
                pass

            product_data.append({
                "상품이미지": img,
                "상품명": name,
                "원상품금액": origin_price,
                "할인율": discount_rate,
                "판매금액": discount_price
            })

            # ✅ 디코딩 출력
            print(f"\n📦 새 상품 수집됨:")
            print(f"  🛒 상품명     : {name}")
            print(f"  🖼️ 이미지 URL: {img}")
            print(f"  💰 원가       : {origin_price}")
            print(f"  🔻 할인율     : {discount_rate}")
            print(f"  ✅ 판매가     : {discount_price}")

        except Exception as e:
            continue

    if new_count == 0:
        print("\n✅ 더 이상 새로운 상품이 없습니다. 종료합니다.")
        break

    print(f"\n🆕 이번 스크롤에서 수집된 새 상품 수: {new_count}개")
    scroll_round += 1


🔽 [스크롤 1회차] 페이지 끝으로 이동 중...

📦 새 상품 수집됨:
  🛒 상품명     : มาม่า รสส้มยำกุ้งน้ำข้น 1 แพค (5ห่อ)
  🖼️ 이미지 URL: https://s3cross-media.s3.amazonaws.com/shop_media/real_d33fdb3a-79b5-4765-8b4f-5131aabd2211_thumbnail_TH.png
  💰 원가       : 3,600วอน
  🔻 할인율     : 
  ✅ 판매가     : 3,000วอน

📦 새 상품 수집됨:
  🛒 상품명     : หน่อไม้ดองเส้น 454ก.
  🖼️ 이미지 URL: https://s3cross-media.s3.amazonaws.com/shop_media/real_78a74a97-7487-4168-8257-ebcf744eb086_thumbnail_TH_thumbnail.jpg
  💰 원가       : 
  🔻 할인율     : 
  ✅ 판매가     : 2,500วอน

📦 새 상품 수집됨:
  🛒 상품명     : รสดีเมนู เครื่องพะโล้กึ่งสำเร็จรูปชนิดผง 60g
  🖼️ 이미지 URL: https://s3cross-media.s3.amazonaws.com/shop_media/1034_f8c1c7f4-b9b6-4cb4-992e-9219bac170c0_thumbnail_TH-1_thumbnail.jpg
  💰 원가       : 
  🔻 할인율     : 
  ✅ 판매가     : 1,500วอน

📦 새 상품 수집됨:
  🛒 상품명     : น้ำจิ้มลูกชิ้น สูตรเผ็ดจัดจ้าน 300ก.
  🖼️ 이미지 URL: https://s3cross-media.s3.amazonaws.com/shop_media/real_6bbbca31-4155-4350-acbd-b48d959594de_thumbnail.png
  💰 원가       : 3,200วอน
  🔻 할인율     : 
  ✅

In [28]:
# --------------------------------------------------
# ✅ [2단계] 종료 및 저장
# --------------------------------------------------

driver.quit()

df = pd.DataFrame(product_data)
df.to_excel("크로스상품목록_완전수집.xlsx", index=False)
print(f"🎉 크롤링 완료! 총 {len(df)}개 상품 저장됨.")

🎉 크롤링 완료! 총 428개 상품 저장됨.
