In [1]:
import requests
from bs4 import BeautifulSoup
import csv

In [2]:
def scrape_recipe(url):
    # Make a request to the recipe page
    response = requests.get(url)

    # Parse the HTML content of the page using Beautiful Soup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the required information from the page
    recipe_name = soup.find('h1').text.strip()
    description = soup.find('p').text.strip()
    instructions = [step.text.strip() for step in soup.find_all('li', class_='comp mntl-sc-block-group--LI mntl-sc-block mntl-sc-block-startgroup')]
    grocery_items = [item.text.strip() for item in soup.find_all('span', {'data-ingredient-name': 'true'})]
    grocery_quantities = [item.text.strip() for item in soup.find_all('span', {'data-ingredient-quantity': 'true'})]
    grocery_types = [item.text.strip() for item in soup.find_all('span', {'data-ingredient-unit': 'true'})]
    total_time = soup.find('div', {'class': 'mntl-recipe-details__label'}, text='Total Time:').find_next_sibling('div').text.strip()
    servings = soup.find('div', {'class': 'mntl-recipe-details__label'}, text='Servings:').find_next_sibling('div').text.strip()
    
    grocery_quantities = [q.replace('½', '1/2') for q in grocery_quantities]
    grocery_quantities = [q.replace('¼', '1/4') for q in grocery_quantities]
    grocery_quantities = [q.replace('¾', '3/4') for q in grocery_quantities]

    
    # Return the extracted information as a dictionary
    return {
        'recipe_name': recipe_name,
        'description': description,
        'instructions': '\n'.join(instructions),
        'grocery_items': '\n'.join(grocery_items),
        'grocery_quantities': '\n'.join(grocery_quantities),
        'grocery_types': '\n'.join(grocery_types),
        'total_time': total_time,
        'serving_size': servings
    }

In [3]:
def scrape_website(url):
    # Make a request to the website's recipe index page
    response = requests.get(url)

    # Parse the HTML content of the page using Beautiful Soup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the URLs of all recipe pages on the website
    recipe_urls = soup.find_all('a', class_ = 'mntl-sc-block-featuredlink__link mntl-text-link button--contained-standard type--squirrel')
    links = [link['href'] for link in recipe_urls]
    
    # Scrape each recipe page using the scrape_recipe function
    recipes = [scrape_recipe(link) for link in links]

    # Return the list of scraped recipe data
    return recipes

In [4]:
data = scrape_website('https://www.allrecipes.com/gallery/best-street-food-recipes-to-make-at-home/')

In [5]:
with open('recipes.csv', 'w', newline='', encoding='utf-8') as file:

    # Define the column names
    fieldnames = [
        'Recipe Name',
        'Description',
        'Instructions',
        'Grocery Items',
        'Grocery Quantities',
        'Grocery Types',
        'Total Time',
        'Serving Size'
    ]

    # Create a CSV writer object
    writer = csv.DictWriter(file, fieldnames=fieldnames)

    # Write the column names
    writer.writeheader()

    # Write the data rows
    for recipe in data:
        writer.writerow({
            'Recipe Name': recipe['recipe_name'],
            'Description': recipe['description'],
            'Instructions': recipe['instructions'],
            'Grocery Items': recipe['grocery_items'],
            'Grocery Quantities': recipe['grocery_quantities'],
            'Grocery Types': recipe['grocery_types'],
            'Total Time': recipe['total_time'],
            'Serving Size': recipe['serving_size']
        })