In [2]:
from recipe_scrapers import scrape_me
import pandas as pd
from recipe_scrapers._exceptions import SchemaOrgException  # Import the SchemaOrgException

In [4]:
# Function to scrape data from a single URL using recipe-scrapers
def scrape_recipe(url):
    scraper = scrape_me(url)
    
    def safe_scrape(scrape_func, default='N/A'):
        try:
            result = scrape_func()
            if result is None:
                raise ValueError(f"{scrape_func.__name__} returned None")
            return result
        except (SchemaOrgException, ValueError) as e:
            print(f"Error getting {scrape_func.__name__} for {url}: {e}")
            return default
    
    category = safe_scrape(scraper.category)
    title = safe_scrape(scraper.title)
    total_time = safe_scrape(scraper.total_time)
    ingredients= ', '.join(safe_scrape(scraper.ingredients, default=[]))
    ingredient_groups = safe_scrape(scraper.ingredient_groups, default=[])
    ingredient_groups= ', '.join([str(group) for group in ingredient_groups])
    instructions = safe_scrape(scraper.instructions)
    nutrients = safe_scrape(scraper.nutrients)
    yields = safe_scrape(scraper.yields)
    description = safe_scrape(scraper.description)
    image = safe_scrape(scraper.image)
    ratings = safe_scrape(scraper.ratings)
    cuisine = safe_scrape(scraper.cuisine)
    
    # Check for missing ingredients
    if 'N/A' in ingredient_groups:
        status = 'Incomplete'
    else:
        status = 'Complete'
    
    return {
        'Category': category,
        'Title': title,
        'Total Time': total_time,
        'All Ingredients': ingredients,
        'Ingredient Groups': ingredient_groups, 
        'Instructions': instructions,
        'Nutrition': nutrients,
        'Cuisine': cuisine,
        'Yields': yields,
        'Image': image,
        'Ratings': ratings,
        'Description': description,
        'Status': status  # New column to indicate completeness
    }



In [45]:
# Function to check for duplicate URLs
def check_duplicate_urls(url_list):
    seen = set()
    duplicates = set()
    for url in url_list:
        if url in seen:
            duplicates.add(url)
        else:
            seen.add(url)
    return duplicates


In [80]:
# List of recipe URLs to scrape
urls=[
#list of recipe URLS
]


In [81]:
# Check for duplicate URLs
duplicates = check_duplicate_urls(urls)
if duplicates:
    print("Duplicate URLs found:")
    for url in duplicates:
        print(url)
else:
    print("No duplicate URLs found.")

No duplicate URLs found.


In [82]:
# Scrape recipes and store data in a DataFrame
recipe_data = []

for url in urls:
    if url:  # Skip empty URLs
        recipe_data.append(scrape_recipe(url))

df = pd.DataFrame(recipe_data)

In [83]:
df

Unnamed: 0,Category,Title,Total Time,All Ingredients,Ingredient Groups,Instructions,Nutrition,Cuisine,Yields,Image,Ratings,Description,Status
0,Dessert,Lemon Pound Cake Recipe,105,"1 1/2 cups butter, 1 (8-ounce) package cream c...",IngredientGroup(ingredients=['1 1/2 cups butte...,For the Lemon Pound Cake\nPreheat oven to 325 ...,"{'servingSize': '1 slice', 'calories': '457 kc...",American,16 servings,https://addapinch.com/wp-content/uploads/2015/...,4.91,This easy Lemon Pound Cake recipe makes a drea...,Complete
1,"Dessert,Side Dish",Strawberry Pretzel Salad Recipe,45,"2 cups pretzels (crushed), 3/4 cup butter (mel...",IngredientGroup(ingredients=['2 cups pretzels ...,Preheat oven to 400˚.\nMix crushed pretzels wi...,"{'servingSize': '0.5 cup', 'calories': '209 kc...",American,12 servings,https://addapinch.com/wp-content/uploads/2012/...,5.00,"Strawberry Pretzel Salad is an easy, delicious...",Complete
2,Dessert,The Best Brownies Recipe Ever,40,"1 cup butter, 1 cup unsweetened cocoa powder, ...","IngredientGroup(ingredients=['1 cup butter', '...",Prep. Preheat the oven to 350º F. Line a 9 x 9...,"{'calories': '319 kcal', 'carbohydrateContent'...",American,16 servings,https://addapinch.com/wp-content/uploads/2012/...,5.00,The Best Brownie recipe ever! These easy Homem...,Complete
3,Dessert,Cream Cheese Pound Cake,100,"1 1/2 cups salted butter (room temperature ), ...",IngredientGroup(ingredients=['1 1/2 cups salte...,Preheat oven to 325º F. Spray a 12-cup Bundt o...,"{'calories': '458 kcal', 'carbohydrateContent'...",American,16 servings,https://addapinch.com/wp-content/uploads/2024/...,4.91,This Cream Cheese Pound Cake recipe makes the ...,Complete
4,Dessert,Chocolate Covered Strawberries Recipe,7,"1 pint fresh strawberries, 1 (10-ounce) packag...",IngredientGroup(ingredients=['1 pint fresh str...,Prep. Line a rimmed baking sheet with parchmen...,"{'calories': '149 kcal', 'carbohydrateContent'...",American,8 servings,https://addapinch.com/wp-content/uploads/2020/...,5.00,A step-by-step guide for how to make delicious...,Complete
...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,Dessert,Southern Fried Pies,45,"double crust pie dough (your favorite), 4 cups...",IngredientGroup(ingredients=['double crust pie...,Cook your fruit with water and sugar in a medi...,"{'carbohydrateContent': '49 g', 'proteinConten...",American,12 servings,https://addapinch.com/wp-content/uploads/2011/...,5.00,Southern fried pies are a delicious treat. Fil...,Complete
370,Dessert,Chocolate Melting Cake Recipe,40,"8 ounces chocolate chips, 1 cup butter, 7 eggs...",IngredientGroup(ingredients=['8 ounces chocola...,Preheat oven to 375 degrees.\nMelt together ch...,"{'calories': '470 kcal', 'carbohydrateContent'...",American,8 servings,https://addapinch.com/wp-content/uploads/2010/...,5.00,A delicious recipe for chocolate melting cake ...,Complete
371,Dessert,Pumpkin Creme Brulee Recipe,60,"3 cups heavy cream, 3/4 cup sugar, 1 (15-ounce...",IngredientGroup(ingredients=['3 cups heavy cre...,Preheat oven to 300 degrees\nPrepare ramekins ...,"{'calories': '447 kcal', 'carbohydrateContent'...",French,8 servings,https://addapinch.com/wp-content/uploads/2010/...,5.00,Pumpkin Creme Brulee is a delicious seasonal u...,Complete
372,Dessert,Swedish Chocolate Balls,140,"1 1/2 cups confectioner's sugar, 1 pound butte...","IngredientGroup(ingredients=[""1 1/2 cups confe...","Cream butter, sugar, and cocoa until smooth.\n...","{'carbohydrateContent': '37 g', 'proteinConten...",Swedish,12 servings,https://addapinch.com/wp-content/uploads/2010/...,5.00,Chocolate balls make for a delicious sweet tre...,Complete


In [84]:
df['Ingredient Groups'][0]

'IngredientGroup(ingredients=[\'1 1/2 cups butter\', \'1 (8-ounce) package cream cheese\', \'6 large eggs\', \'2 tablespoons lemon juice\', \'1 tablespoon lemon zest\', \'3 cups sugar\', \'3 cups all-purpose flour\', \'1 teaspoon kosher salt\', \'1 tablespoon vanilla extract\'], purpose=\'For the Lemon Pound Cake\'), IngredientGroup(ingredients=["1 1/2 cups confectioner\'s sugar", \'2 tablespoons lemon juice\', \'1 tablespoon lemon zest\', \'1 tablespoon buttermilk\'], purpose=\'For the Lemon Buttermilk Glaze\')'

In [85]:
formatted_ingredients = '\n'.join(ingredient.strip() for ingredient in df['Ingredient Groups'][0].split(', '))

In [51]:
print(formatted_ingredients)

IngredientGroup(ingredients=['1 pound ground beef'
'2 tablespoons diced onion'
'1 large egg'
'1 1/2 teaspoons Stone House Seasoning'
'1/4 teaspoon ground allspice (optional)'
'1/4 teaspoon ground nutmeg (optional)'
'2 tablespoons bread or cracker crumbs'
'2 tablespoons chopped fresh parsley'
'olive oil']
purpose='For the meatballs:')
IngredientGroup(ingredients=['2 tablespoons butter'
'3 tablespoons all-purpose flour'
'2 cups beef stock or broth'
'1 cup whole milk or heavy cream'
'1 teaspoon Stone House Seasoning'
'2 teaspoons Worcestershire sauce'
'2 tablespoons chopped fresh parsley (optional)']
purpose='For the sauce:')


In [86]:
# Save the DataFrame to an Excel file
df.to_excel('desserts.xlsx', index=False)

print("Recipe data scraped and saved to 'desserts.xlsx'.")

Recipe data scraped and saved to 'desserts.xlsx'.
