In [2]:
import os

import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
from sklearn.cluster import KMeans
from transformers import pipeline
import pandas as pd

import torch
from dotenv import load_dotenv

# This sets the device variable to 'cuda' if a CUDA-enabled GPU is available, 
# otherwise it defaults to 'cpu'.
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {DEVICE}")

from supabase import Client, create_client

load_dotenv()
url: str = os.getenv("NEXT_PUBLIC_SUPABASE_URL")
key: str = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
supabase: Client = create_client(url, key)


# LOAD DATA (runs once at startup)
ingredients = pd.DataFrame(supabase.table("Ingredient").select("*").execute().data)
recipes = pd.DataFrame(supabase.table("Recipe").select("*").execute().data)
recipe_ingredient_map = pd.DataFrame(supabase.table("Recipe_Ingredient_Map").select("*").execute().data)
tags = pd.DataFrame(supabase.table("RecipeTag").select("*").execute().data)
recipe_tag_map = pd.DataFrame(supabase.table("Recipe_Tag_Map").select("*").execute().data)

Using device: cpu


In [3]:
 # MERGE RECIPE METADATA
# recipe_tags = recipe_tag_map.merge(tags, left_on="tag_id", right_on="id", suffixes=("", "_tag"))
# recipe_tags = recipe_tags.groupby("recipe_id")["name"].apply(list).reset_index(name="tags")

# recipe_ing = recipe_ingredient_map.merge(ingredients, left_on="ingredient_id", right_on="id", suffixes=("", "_ing"))
# recipe_ing = recipe_ing.groupby("recipe_id")["name"].apply(list).reset_index(name="ingredients")

# recipe_data = recipes.merge(recipe_tags, left_on="id", right_on="recipe_id", how="left")
# recipe_data = recipe_data.merge(recipe_ing, left_on="id", right_on="recipe_id", how="left")

print("recipe_tag_map columns:", recipe_tag_map.columns)
print("tags columns:", tags.columns)


recipe_tag_map columns: Index(['id', 'tag_id', 'recipe_id'], dtype='object')
tags columns: Index(['id', 'created_at', 'description', 'name'], dtype='object')


In [4]:
recipe_tag_map

Unnamed: 0,id,tag_id,recipe_id
0,411,71,101
1,412,72,101
2,413,73,102
3,414,74,102
4,415,75,102
...,...,...,...
77,488,79,148
78,489,91,148
79,490,86,149
80,491,83,150


In [5]:
# Test user ID (replace with actual user ID from your database)
TEST_USER_ID = 3  # Change this to your actual user ID

# Fetch user profile
user_profile = supabase.table("User").select("*").eq("id", TEST_USER_ID).execute()
user_data = user_profile.data[0] if user_profile.data else {}

print("User Profile:")
print(f"  Allergies: {user_data.get('allergies', [])}")
print(f"  Dietary Preferences: {user_data.get('dietary_preferences', [])}")
print(f"  Kitchen Equipment: {user_data.get('kitchen_equipment', [])}")
print(f"  Goals: {user_data.get('goals', 'None')}")

# Fetch pantry items
pantry_items = supabase.table("PantryItem").select("*").eq("user_id", TEST_USER_ID).execute()
pantry_list = [item['name'].lower() for item in pantry_items.data]

print(f"\nPantry Items ({len(pantry_list)}):")
print(f"  {', '.join(pantry_list[:10])}" + ("..." if len(pantry_list) > 10 else ""))

User Profile:
  Allergies: ['Fish', 'Shellfish']
  Dietary Preferences: ['Vegetarian']
  Kitchen Equipment: ['Air Fryer', 'Blender', 'Instant Pot', 'Grill']
  Goals: Gain 5 Kg

Pantry Items (0):
  


In [8]:
# Load all necessary data from Supabase
print("Loading data from Supabase...")

ingredients = pd.DataFrame(supabase.table("Ingredient").select("*").execute().data)
recipes = pd.DataFrame(supabase.table("Recipe").select("*").execute().data)
recipe_ing_map = pd.DataFrame(supabase.table("Recipe_Ingredient_Map").select("*").execute().data)
tags = pd.DataFrame(supabase.table("RecipeTag").select("*").execute().data)
recipe_tag_map = pd.DataFrame(supabase.table("Recipe_Tag_Map").select("*").execute().data)

print(f"Loaded: {len(recipes)} recipes, {len(ingredients)} ingredients, {len(tags)} tags")

# Debug: Check if we have data in the mapping tables
print(f"Recipe-Ingredient mappings: {len(recipe_ing_map)}")
print(f"Recipe-Tag mappings: {len(recipe_tag_map)}")

# Merge recipe tags - group by recipe_id BEFORE merging with recipes
if len(recipe_tag_map) > 0:
    recipe_tags = recipe_tag_map.merge(
        tags[['id', 'name']], 
        left_on="tag_id", 
        right_on="id", 
        how="left"
    )
    recipe_tags = recipe_tags.groupby("recipe_id")["name"].apply(list).reset_index(name="tags")
else:
    recipe_tags = pd.DataFrame({'recipe_id': [], 'tags': []})

# Merge recipe ingredients - group by recipe_id BEFORE merging with recipes
if len(recipe_ing_map) > 0:
    recipe_ing = recipe_ing_map.merge(
        ingredients[['id', 'name']], 
        left_on="ingredient_id", 
        right_on="id", 
        how="left"
    )
    recipe_ing = recipe_ing.groupby("recipe_id")["name"].apply(list).reset_index(name="ingredients")
else:
    recipe_ing = pd.DataFrame({'recipe_id': [], 'ingredients': []})

# Now merge with recipes using left join to keep all recipes
recipe_data = recipes.copy()
recipe_data = recipe_data.merge(recipe_tags, left_on="id", right_on="recipe_id", how="left")
recipe_data = recipe_data.merge(recipe_ing, left_on="id", right_on="recipe_id", how="left")

# Drop the duplicate recipe_id columns from the joins
recipe_data = recipe_data.drop(columns=['recipe_id_x', 'recipe_id_y'], errors='ignore')

# Fill NaN values with empty lists
recipe_data["tags"] = recipe_data["tags"].apply(lambda x: x if isinstance(x, list) else [])
recipe_data["ingredients"] = recipe_data["ingredients"].apply(lambda x: x if isinstance(x, list) else [])

print(f"\nRecipe data merged successfully!")
print(f"Total recipes: {len(recipe_data)}")

# Check how many recipes have ingredients and tags
recipes_with_ingredients = recipe_data[recipe_data['ingredients'].apply(len) > 0]
recipes_with_tags = recipe_data[recipe_data['tags'].apply(len) > 0]

print(f"Recipes with ingredients: {len(recipes_with_ingredients)}")
print(f"Recipes with tags: {len(recipes_with_tags)}")

# Show a sample recipe with data
sample_with_data = recipe_data[recipe_data['ingredients'].apply(len) > 0].head(1)
if len(sample_with_data) > 0:
    sample = sample_with_data.iloc[0]
    print(f"\nSample recipe: {sample['name']}")
    print(f"  - Ingredients ({len(sample['ingredients'])}): {', '.join(sample['ingredients'][:5])}" + ("..." if len(sample['ingredients']) > 5 else ""))
    print(f"  - Tags ({len(sample['tags'])}): {sample['tags']}")
else:
    print("\nWarning: No recipes found with ingredients!")
    print("Sample recipe (first one):", recipe_data.iloc[0]['name'])

Loading data from Supabase...
Loaded: 150 recipes, 1000 ingredients, 105 tags
Recipe-Ingredient mappings: 528
Recipe-Tag mappings: 82

Recipe data merged successfully!
Total recipes: 150
Recipes with ingredients: 50
Recipes with tags: 34

Sample recipe: Apple Frangipan Tart
  - Ingredients (9): Digestive Biscuits, Butter, Bramley Apples, Salted Butter, Caster Sugar...
  - Tags (3): ['Tart', 'Baking', 'Fruity']


In [9]:
def filter_allergens(recipe_df, allergies):
    """Remove recipes containing allergens"""
    if not allergies or len(allergies) == 0:
        return recipe_df
    
    # Handle "No Allergy" option
    if "No Allergy" in allergies or "no allergy" in [a.lower() for a in allergies]:
        return recipe_df
    
    filtered = recipe_df.copy()
    initial_count = len(filtered)
    
    for allergen in allergies:
        allergen_lower = allergen.lower()
        # Check if allergen appears in ingredients
        mask = filtered['ingredients'].apply(
            lambda ing_list: not any(
                allergen_lower in str(ing).lower() 
                for ing in ing_list
            )
        )
        filtered = filtered[mask]
    
    removed = initial_count - len(filtered)
    print(f"Filtered out {removed} recipes due to allergies: {', '.join(allergies)}")
    return filtered.reset_index(drop=True)

# Test it
allergies = user_data.get('allergies', [])
print(f"User allergies: {allergies}")

filtered_recipes = filter_allergens(recipe_data, allergies)
print(f"Recipes after allergen filtering: {len(filtered_recipes)} (from {len(recipe_data)})")

User allergies: ['Fish', 'Shellfish']
Filtered out 2 recipes due to allergies: Fish, Shellfish
Recipes after allergen filtering: 148 (from 150)


In [10]:
def calculate_pantry_score(ingredients, pantry_items):
    """
    Calculate what % of recipe ingredients are in user's pantry
    Returns: score between 0 and 1
    """
    if not pantry_items or not ingredients:
        return 0.0
    
    ingredients_lower = [str(ing).lower() for ing in ingredients]
    pantry_lower = [str(item).lower() for item in pantry_items]
    
    # Count how many recipe ingredients are in pantry
    matches = 0
    for ing in ingredients_lower:
        for pantry_item in pantry_lower:
            # Check if pantry item is in ingredient or vice versa
            if pantry_item in ing or ing in pantry_item:
                matches += 1
                break  # Count each ingredient only once
    
    return matches / len(ingredients) if ingredients else 0.0

# Test on a few recipes
print("Sample Pantry Match Scores:")
print(f"Pantry items: {pantry_list[:5]}...\n")

for idx in range(min(5, len(filtered_recipes))):
    recipe = filtered_recipes.iloc[idx]
    score = calculate_pantry_score(recipe['ingredients'], pantry_list)
    matched = int(score * len(recipe['ingredients']))
    total = len(recipe['ingredients'])
    print(f"{recipe['name'][:45]:45s} - {score:5.1%} match ({matched}/{total} ingredients)")

Sample Pantry Match Scores:
Pantry items: []...

Garides Saganaki                              -  0.0% match (0/0 ingredients)
Chicken Enchilada Casserole                   -  0.0% match (0/0 ingredients)
Bakewell tart                                 -  0.0% match (0/0 ingredients)
Apple Frangipan Tart                          -  0.0% match (0/0 ingredients)
Honey Teriyaki Salmon                         -  0.0% match (0/0 ingredients)


In [11]:
# Load the sentence transformer model if not already loaded
try:
    model
except NameError:
    print("Loading sentence transformer model...")
    from sentence_transformers import SentenceTransformer, util
    model = SentenceTransformer('all-MiniLM-L6-v2')
    print("Model loaded!")

def hybrid_recipe_score(recipe, goal_embedding, pantry_items, pantry_weight=0.3):
    """
    Combine semantic similarity with pantry matching
    
    Args:
        recipe: Recipe row from dataframe
        goal_embedding: Embedding of user's goal
        pantry_items: List of pantry item names
        pantry_weight: How much to weight pantry matching (0-1)
                      0.3 means 30% pantry, 70% semantic similarity
    
    Returns:
        (final_score, semantic_score, pantry_score)
    """
    # Create recipe text for embedding
    recipe_name = str(recipe.get('name', ''))
    recipe_tags = recipe.get('tags', [])
    recipe_ingredients = recipe.get('ingredients', [])
    
    tags_text = ' '.join([str(t) for t in recipe_tags]) if recipe_tags else ''
    ing_text = ' '.join([str(i) for i in recipe_ingredients[:10]]) if recipe_ingredients else ''
    
    recipe_text = f"{recipe_name} {tags_text} {ing_text}"
    
    # Semantic similarity score (based on goal)
    recipe_embedding = model.encode(recipe_text, convert_to_tensor=True)
    semantic_score = util.cos_sim(goal_embedding, recipe_embedding).item()
    
    # Pantry match score
    pantry_score = calculate_pantry_score(recipe_ingredients, pantry_items)
    
    # Weighted combination
    final_score = (1 - pantry_weight) * semantic_score + pantry_weight * pantry_score
    
    return final_score, semantic_score, pantry_score

# Test it
print("Testing hybrid scoring...")
test_goal = user_data.get('goals', 'healthy high protein meals')
goal_embedding = model.encode(test_goal, convert_to_tensor=True)

print(f"\nGoal: '{test_goal}'")
print(f"Pantry weight: 30% (adjustable)\n")
print(f"{'Recipe Name':<45} {'Semantic':>8} {'Pantry':>8} {'Final':>8}")
print("-" * 75)

# Score a few recipes
for idx in range(min(10, len(filtered_recipes))):
    recipe = filtered_recipes.iloc[idx]
    final, semantic, pantry = hybrid_recipe_score(recipe, goal_embedding, pantry_list)
    print(f"{recipe['name'][:44]:<45} {semantic:>8.3f} {pantry:>8.3f} {final:>8.3f}")

Loading sentence transformer model...
Model loaded!
Testing hybrid scoring...

Goal: 'Gain 5 Kg'
Pantry weight: 30% (adjustable)

Recipe Name                                   Semantic   Pantry    Final
---------------------------------------------------------------------------
Garides Saganaki                                 0.088    0.000    0.062
Chicken Enchilada Casserole                      0.118    0.000    0.082
Bakewell tart                                    0.087    0.000    0.061
Apple Frangipan Tart                             0.059    0.000    0.041
Honey Teriyaki Salmon                            0.108    0.000    0.076
Chocolate Gateau                                 0.159    0.000    0.111
Cream Cheese Tart                                0.100    0.000    0.070
Dal Fry                                          0.148    0.000    0.104
Hot Chocolate Fudge                              0.134    0.000    0.094
Christmas Pudding Flapjack                       0.026    0.000 

In [12]:
def get_personalized_recommendations(
    goal_text, 
    user_allergies, 
    pantry_items, 
    recipe_df,
    n_recommendations=10,
    pantry_weight=0.3
):
    """
    Get personalized recipe recommendations
    
    Args:
        goal_text: User's dietary goal
        user_allergies: List of allergens to avoid
        pantry_items: List of pantry item names
        recipe_df: DataFrame of all recipes
        n_recommendations: Number of recipes to return
        pantry_weight: Weight for pantry matching (0-1)
    
    Returns:
        DataFrame of recommended recipes with scores
    """
    print(f"Generating {n_recommendations} personalized recommendations...")
    
    # 1. Filter allergens
    safe_recipes = filter_allergens(recipe_df, user_allergies)
    print(f"Safe recipes after allergen filter: {len(safe_recipes)}")
    
    # 2. Encode goal
    goal_embedding = model.encode(goal_text, convert_to_tensor=True)
    
    # 3. Score all recipes
    scores = []
    for idx, recipe in safe_recipes.iterrows():
        final_score, semantic, pantry = hybrid_recipe_score(
            recipe, goal_embedding, pantry_items, pantry_weight
        )
        scores.append({
            'recipe_id': recipe['id'],
            'name': recipe['name'],
            'final_score': final_score,
            'semantic_score': semantic,
            'pantry_score': pantry,
            'num_ingredients': len(recipe.get('ingredients', [])),
            'tags': recipe.get('tags', []),
            'prep_time': recipe.get('min_prep_time', None)
        })
    
    # 4. Sort by final score and return top N
    scores_df = pd.DataFrame(scores)
    scores_df = scores_df.sort_values('final_score', ascending=False)
    
    return scores_df.head(n_recommendations)

# Test the full pipeline
print("=" * 80)
print("PERSONALIZED RECOMMENDATIONS")
print("=" * 80)

recommendations = get_personalized_recommendations(
    goal_text=user_data.get('goals', 'healthy meals'),
    user_allergies=user_data.get('allergies', []),
    pantry_items=pantry_list,
    recipe_df=recipe_data,
    n_recommendations=10,
    pantry_weight=0.3  # 30% pantry, 70% semantic
)

print(f"\nTop 10 Recommendations:")
print("=" * 80)
for i, (idx, row) in enumerate(recommendations.iterrows(), 1):
    print(f"\n{i}. {row['name']}")
    print(f"   Final: {row['final_score']:.3f} | Semantic: {row['semantic_score']:.3f} | Pantry: {row['pantry_score']:.1%}")
    print(f"   Tags: {', '.join(row['tags'][:3])}" + ("..." if len(row['tags']) > 3 else ""))
    if row['prep_time']:
        print(f"   Prep time: {row['prep_time']} min")

PERSONALIZED RECOMMENDATIONS
Generating 10 personalized recommendations...
Filtered out 2 recipes due to allergies: Fish, Shellfish
Safe recipes after allergen filter: 148

Top 10 Recommendations:

1. Chicken Karaage
   Final: 0.195 | Semantic: 0.279 | Pantry: 0.0%
   Tags: 
   Prep time: 30 min

2. Chicken Karaage
   Final: 0.195 | Semantic: 0.279 | Pantry: 0.0%
   Tags: 
   Prep time: 30 min

3. Dal Fry
   Final: 0.184 | Semantic: 0.263 | Pantry: 0.0%
   Tags: Curry, Vegetarian, Cake
   Prep time: 30 min

4. Bread and Butter Pudding
   Final: 0.170 | Semantic: 0.243 | Pantry: 0.0%
   Tags: Pudding, Brunch
   Prep time: 30 min

5. Chicken Karaage
   Final: 0.169 | Semantic: 0.241 | Pantry: 0.0%
   Tags: 
   Prep time: 30 min

6. Grilled Mac and Cheese Sandwich
   Final: 0.159 | Semantic: 0.227 | Pantry: 0.0%
   Tags: 
   Prep time: 30 min

7. Coq au Vin
   Final: 0.155 | Semantic: 0.222 | Pantry: 0.0%
   Tags: 
   Prep time: 60 min

8. Chicken, Ham and Leek Pie
   Final: 0.145 | Seman