In [15]:
import time
import random
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re

In [17]:
# WebDriver 설정, 드라이버 객체 생성
def setup_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    wait = WebDriverWait(driver, 10)
    return driver, wait

# 특정 페이지에서 레시피 링크 가져오기
def get_recipe_links(driver, page_url):
    driver.get(page_url) #링크로 이동
    time.sleep(random.uniform(1.5, 3)) #잠시 기기
    links = driver.find_elements(By.CLASS_NAME, "common_sp_link")
    return [link.get_attribute("href") for link in links if link.get_attribute("href")]

# 레시피 데이터 추출
def extract_recipe_data(driver, wait, href):
    try:
        driver.get(href)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, "view2_summary_info")))
        time.sleep(random.uniform(1.5, 3))

        # 제목
        title = driver.title.strip()

        # 요약 정보
        summary_info = []
        infos = driver.find_elements(By.CLASS_NAME, "view2_summary_info")
        for info in infos:
            spans = info.find_elements(By.TAG_NAME, "span")
            summary_info.extend([span.text.strip() for span in spans])

        # 재료
        ingredients_dict = {}
        ingredient_items = driver.find_elements(By.CSS_SELECTOR, "#divConfirmedMaterialArea li")
        for item in ingredient_items:
            name = item.find_element(By.CLASS_NAME, "ingre_list_name").text.strip()
            try:
                quantity = item.find_element(By.CLASS_NAME, "ingre_list_ea").text.strip()
            except:
                quantity = ""  # 양이 없을 경우 공백 처리
            ingredients_dict[name] = quantity

        # 조리도구
        try:
            tools_section = driver.find_element(By.XPATH, '//*[@id="contents_area_full"]/div[6]/div[4]')
            tools = [item.text.strip() for item in tools_section.find_elements(By.CLASS_NAME, "ingre_list_name")]
        except:
            tools = []

        # 조리 순서
        recipe_steps = {}
        step = 1
        while True:
            try:
                step_element = driver.find_element(By.ID, f"stepdescr{step}")
                full_text = step_element.text.strip()
                try:
                    p_text = step_element.find_element(By.CLASS_NAME, "step_add").text.strip()
                    main_text = full_text.replace(p_text, "").strip()
                except:
                    main_text = full_text
                recipe_steps[step] = main_text
                step += 1
            except:
                break

        # 데이터 반환
        return {
            "Title": title,
            "Summary Info": summary_info,
            "Ingredients": ingredients_dict,
            "Tools": tools,
            "Recipe Steps": recipe_steps,
        }

    except Exception as e:
        print(f"Error processing {href}: {e}")
        return None

# 데이터를 JSON 파일로 저장
def save_recipe_data(recipe_data):
    # 파일 이름에서 허용되지 않는 문자 제거
    title = recipe_data["Title"]
    safe_title = re.sub(r'[<>:"/\\|?*]', '_', title)  # 안전한 파일 이름으로 변환

    # JSON 파일로 저장
    with open(f"{safe_title}.json", "w", encoding="utf-8") as f:
        json.dump(recipe_data, f, ensure_ascii=False, indent=4)


# 메인 실행 함수
def main():
    driver, wait = setup_driver()

    # 테스트용: 1페이지부터 2페이지까지
    start_page = 1
    end_page = 2

    try:
        for page in range(start_page, end_page + 1):
            page_url = f"https://www.10000recipe.com/recipe/list.html?order=reco&page={page}"
            print(f"Processing page: {page}")
            recipe_links = get_recipe_links(driver, page_url)

            for href in recipe_links:
                print(f"Processing recipe: {href}")
                recipe_data = extract_recipe_data(driver, wait, href)
                if recipe_data:
                    print(f"Title: {recipe_data['Title']}")
                    save_recipe_data(recipe_data)

    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Processing page: 1
Processing recipe: https://www.10000recipe.com/recipe/7039682
Title: [가지양념구이] 간단하지만 맛있는 황금레시피 | 신혼밥상
Processing recipe: https://www.10000recipe.com/recipe/7039214
Title: 치킨너겟 교촌 허니콤보 만들기
Processing recipe: https://www.10000recipe.com/recipe/6984517
Title: 도시락 반찬으로 최고! 스팸감자조림♡
Processing recipe: https://www.10000recipe.com/recipe/7009944
Title: 오이고추된장무침 (사계절 먹기좋은 밑반찬 고추무침 5분 레시피)
Processing recipe: https://www.10000recipe.com/recipe/7003487
Title: 청경채 된장 무침으로
Processing recipe: https://www.10000recipe.com/recipe/7022775
Title: 시금치무침 (시금치데치는 방법 시금치나물요리 나물반찬 반찬레시피)
Processing recipe: https://www.10000recipe.com/recipe/7029407
Title: 즉석,오이 부추 무침(백종원양념)
Processing recipe: https://www.10000recipe.com/recipe/6989690
Title: 아이반찬 양파소세지볶음 레시피 양파요리 초스피드반찬
Processing recipe: https://www.10000recipe.com/recipe/7007585
Title: 베이컨감자채볶음
Processing recipe: https://www.10000recipe.com/recipe/6993517
Title: 새송이버섯버터굴소스볶음 간단반찬
Processing recipe: https://www.10000recipe.com/recipe/7023640

KeyboardInterrupt: 