In [25]:
import time
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

# Function to scrape a single recipe
def scrape_recipe(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting the required information
    recipe_title = soup.find('h1').text.strip()

    # Extracting recipe rating
    rating_elements = soup.find_all('li', class_='rc-rate-star')
    recipe_rating = sum(1 for star in rating_elements if 'rc-rate-star-full' in star['class'])

    # Extracting global likes
    global_likes_button = soup.find('button', {'data-test': 'global-likes'})
    global_likes = global_likes_button.find('span').text.strip() if global_likes_button and global_likes_button.find('span') else 'No likes'

    # Extracting difficulty
    difficulty = soup.find('span', {'data-test': 'recipe-difficulty-level'}).text.strip() if soup.find('span', {'data-test': 'recipe-difficulty-level'}) else 'No difficulty'

    # Extracting preparation, baking, and resting times
    time_elements = soup.find_all('div', {'class': 'text-base font-semibold sm:text-lg'})
    
    preparation = time_elements[0].text.strip() if len(time_elements) > 0 else 'No preparation time'
    baking = time_elements[1].text.strip() if len(time_elements) > 1 else 'No baking time'
    resting = time_elements[2].text.strip() if len(time_elements) > 2 else 'No resting time'

    # Extracting ingredients
    ingredients_elements = soup.find_all('div', {'data-test': 'recipe-ingredients-item'})
    ingredients = []
    for element in ingredients_elements:
        amount_element = element.find('div', {'data-test': 'recipe-ingredients-item-amount'})
        name_element = element.find('div', {'class': 'flex-1'})
        link_element = element.find('a', {'class': 'global-link'})
        
        if amount_element and (name_element or link_element):
            amount = amount_element.text.strip()
            name = name_element.text.strip() if name_element else link_element.text.strip()
            ingredients.append(f"{amount} {name}")

    # Extracting steps
    steps_elements = soup.find_all('p', class_='text-pretty')
    steps = [step.text.strip() for step in steps_elements]

    # Extracting tags
    tags_elements = soup.find_all('li', {'data-test': 'recipe-tags-item'})
    tags = [tag.find('a').text.strip() for tag in tags_elements]

    # Extracting nutrition information
    nutrition = {
        'Cal': 'N/A',
        'Fat': 'N/A',
        'Protein': 'N/A',
        'Carb': 'N/A'
    }
    nutrition_labels = soup.find_all('span', class_='font-semibold')
    for element in nutrition_labels:
        parent = element.find_parent('div')
        label = parent.find('span').text.strip()
        value = element.text.strip()
        if label == 'Cal':
            nutrition['Cal'] = value
        elif label == 'Fat':
            nutrition['Fat'] = value
        elif label == 'Protein':
            nutrition['Protein'] = value
        elif label == 'Carb':
            nutrition['Carb'] = value

    recipe_data = {
        'recipe_title': recipe_title,
        'recipe_rating': recipe_rating,
        'global_likes': global_likes,
        'difficulty': difficulty,
        'preparation': preparation,
        'baking': baking,
        'resting': resting,
        'ingredients': ingredients,
        'steps': steps,
        'tags': tags,
        'Cal': nutrition['Cal'],
        'Fat': nutrition['Fat'],
        'Protein': nutrition['Protein'],
        'Carb': nutrition['Carb']
    }
    
    return recipe_data

# Function to scroll and load all recipes
def load_all_recipes(driver):
    len_of_page = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); return document.body.scrollHeight;")
    match = False
    while not match:
        last_count = len_of_page
        time.sleep(2)
        len_of_page = driver.execute_script("window.scrollTo(0, document.body.scrollHeight); return document.body.scrollHeight;")
        match = last_count == len_of_page

# Function to scrape all recipes from the community page
def scrape_community_recipes(url):
    driver = webdriver.Chrome()  # Make sure chromedriver is in your PATH
    driver.get(url)
    load_all_recipes(driver)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()

    # Find all the recipe links on the community page
    recipe_links = [a['href'] for a in soup.find_all('a', {'data-test': 'global-card-title'})]

    # Base URL to append relative links
    base_url = "https://www.kitchenstories.com"

    recipes = []
    for link in recipe_links:
        recipe_url = f"{base_url}{link}"
        recipe_data = scrape_recipe(recipe_url)
        recipes.append(recipe_data)
    
    return recipes

# URL of the community recipes page
community_url = "https://www.kitchenstories.com/en/recipes/community"

# Scrape the community recipes
community_recipes = scrape_community_recipes(community_url)

# Save the recipes to a CSV file
csv_file = "community_recipes.csv"
csv_columns = ['recipe_title', 'recipe_rating', 'global_likes', 'difficulty', 'preparation', 'baking', 'resting', 'ingredients', 'steps', 'tags', 'Cal', 'Fat', 'Protein', 'Carb']

with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
    writer.writeheader()
    for recipe in community_recipes:
        # Flatten the lists into strings for CSV
        recipe['ingredients'] = ', '.join(recipe['ingredients'])
        recipe['steps'] = ' | '.join(recipe['steps'])
        recipe['tags'] = ', '.join(recipe['tags'])
        writer.writerow(recipe)

print(f"Scraped data saved to {csv_file}")


Scraped data saved to community_recipes.csv
