In [1]:
import pandas as pd
import ast

# Load data
recipes = pd.read_csv("/Users/seonminhwang/Downloads/archive/RAW_recipes.csv")
interactions = pd.read_csv("/Users/seonminhwang/Downloads/archive/RAW_interactions.csv")

# Ensure 'ingredients' column is properly parsed (if stored as a string list)
recipes['ingredients'] = recipes['ingredients'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Create a dictionary mapping recipe IDs to their ingredients
recipe_ingredients = recipes.set_index('id')['ingredients'].to_dict()
recipe_names = recipes.set_index('id')['name'].to_dict()

print(f"Sample recipe ingredients: {list(recipe_ingredients.items())[:5]}")

Sample recipe ingredients: [(137739, ['winter squash', 'mexican seasoning', 'mixed spice', 'honey', 'butter', 'olive oil', 'salt']), (31490, ['prepared pizza crust', 'sausage patty', 'eggs', 'milk', 'salt and pepper', 'cheese']), (112140, ['ground beef', 'yellow onions', 'diced tomatoes', 'tomato paste', 'tomato soup', 'rotel tomatoes', 'kidney beans', 'water', 'chili powder', 'ground cumin', 'salt', 'lettuce', 'cheddar cheese']), (59389, ['spreadable cheese with garlic and herbs', 'new potatoes', 'shallots', 'parsley', 'tarragon', 'olive oil', 'red wine vinegar', 'salt', 'pepper', 'red bell pepper', 'yellow bell pepper']), (44061, ['tomato juice', 'apple cider vinegar', 'sugar', 'salt', 'pepper', 'clove oil', 'cinnamon oil', 'dry mustard'])]


In [2]:
def jaccard_similarity(set1, set2):
    set1, set2 = set(set1), set(set2)
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

In [3]:
def get_similar_recipes(target_recipe_id, recipe_ingredients, recipe_names, exclude_ids=None, top_n=5):

    target_ingredients = recipe_ingredients.get(target_recipe_id, [])
    similarities = []
    seen_ingredient_sets = set()  # Track unique ingredient sets
    
    if exclude_ids is None:
        exclude_ids = set()
    
    for recipe_id, ingredients in recipe_ingredients.items():
        if recipe_id != target_recipe_id and recipe_id not in exclude_ids:
            sim = jaccard_similarity(target_ingredients, ingredients)
            ingredient_set = frozenset(ingredients)  # Use frozenset for immutability
            
            # Only add if this ingredient set hasn't been seen before
            if ingredient_set not in seen_ingredient_sets:
                similarities.append((recipe_id, recipe_names[recipe_id], list(ingredient_set), sim))
                seen_ingredient_sets.add(ingredient_set)
    
    # Sort by similarity in descending order
    similarities = sorted(similarities, key=lambda x: x[3], reverse=True)
    
    return similarities[:top_n]

In [4]:
# Example: Get top 5 similar recipes for a given recipe ID

target_recipe_id = 137739  # Replace with a valid recipe ID from your dataset
similar_recipes = get_similar_recipes(target_recipe_id, recipe_ingredients, recipe_names, exclude_ids=None, top_n=5)

print(f"Top similar recipes to '{recipe_names.get(target_recipe_id, 'Unknown Recipe')}' (ID: {target_recipe_id}):\n")

for idx, (recipe_id, name, ingredients, similarity) in enumerate(similar_recipes, start=1):
    recipe_name = recipe_names.get(recipe_id, 'Unknown Recipe')  # Handle missing
    print(f"{idx}. {recipe_name} (ID: {recipe_id})")
    print(f"   Similarity: {similarity:.2f}")
    print(f"   Ingredients: {', '.join(ingredients)}\n")

Top similar recipes to 'arriba   baked winter squash mexican style' (ID: 137739):

1. berber spice roasted chickpeas (ID: 514675)
   Similarity: 0.38
   Ingredients: dried garbanzo beans, olive oil, salt, mixed spice

2. ed s homemade microwave buttery popcorn (ID: 408958)
   Similarity: 0.38
   Ingredients: butter, olive oil, popcorn, salt

3. honey roasted peanuts (ID: 147856)
   Similarity: 0.38
   Ingredients: butter, honey, peanuts, salt

4. julia child method of preparing garlic (ID: 104441)
   Similarity: 0.38
   Ingredients: butter, olive oil, salt, garlic

5. potatoes rissole (ID: 72347)
   Similarity: 0.38
   Ingredients: butter, russet potatoes, olive oil, salt



In [5]:
def recommend_by_ingredients(user_id, interactions, recipe_ingredients, recipe_names, top_n=5):
 
    # Get recipes rated highly by the user
    user_rated_recipes = interactions[interactions['user_id'] == user_id]
    user_high_ratings = user_rated_recipes[user_rated_recipes['rating'] >= 4]['recipe_id']
    
    recommendations = {}

    for recipe_id in user_high_ratings:
        # Get the name of the recipe the user rated highly
        source_recipe_name = recipe_names.get(recipe_id, 'Unknown Recipe')
        
        # Find similar recipes
        similar_recipes = get_similar_recipes(recipe_id, recipe_ingredients, recipe_names, exclude_ids=set(user_high_ratings), top_n=top_n)
        
        # Add to recommendations dictionary
        recommendations[source_recipe_name] = similar_recipes

    return recommendations

In [6]:
# Example: Get ingredient-based recommendations for a specific user
user_id = 38094  # Replace with a valid user ID
ingredient_based_recommendations = recommend_by_ingredients(user_id, interactions, recipe_ingredients, recipe_names, top_n=5)

In [7]:
print(f"Ingredient-based recommendations for user {user_id}:\n")

for source_recipe_name, similar_recipes in ingredient_based_recommendations.items():
    print(f"Recipes similar to '{source_recipe_name}':\n")
    for idx, (recipe_id, name, ingredients, similarity) in enumerate(similar_recipes, start=1):
        print(f"  {idx}. {name} (ID: {recipe_id})")
        print(f"     Similarity: {similarity:.2f}")
        print(f"     Ingredients: {', '.join(ingredients)}\n")
    print("-" * 50)

Ingredient-based recommendations for user 38094:

Recipes similar to 'white bean   green chile pepper soup':

  1. zesty pumpkin soup with chili cream (ID: 78404)
     Similarity: 0.27
     Ingredients: diced green chilis, ground cumin, water, sour cream, solid-pack pumpkin, fresh cilantro leaves, chicken broth, ground red pepper, chili powder, garlic powder

  2. 5 minute wet beef   green chile burritos (ID: 263505)
     Similarity: 0.25
     Ingredients: beef broth, ground cumin, diced green chilies, monterey jack pepper cheese, sour cream, refried beans, wondra flour, roast beef in gravy, flour tortillas, chili powder, garlic powder

  3. southwestern spiced chicken   black bean stew (ID: 245090)
     Similarity: 0.25
     Ingredients: corn tortilla strips, extra virgin olive oil, red bell pepper, ground cumin, bacon, sour cream, chipotle pepper, yellow onion, kosher salt & freshly ground black pepper, beer, skinless chicken thighs, reduced-sodium chicken broth, fresh cilantro leave