In [1]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
import re

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load dataset
df = pd.read_csv("/home/minhas/cgn-dp-24-1/culinary_compass/data/recipes.csv")

In [3]:
# non_veg_ingredients = ["chicken", "beef", "pork", "fish", "shrimp", "lamb", "eggs","egg"]
# df["DietaryCategory"] = df["RecipeIngredientParts"].apply(
#     lambda ingredients: "Non-Vegetarian" if any(item in str(ingredients).lower() for item in non_veg_ingredients) else "Vegetarian"
# )


In [4]:
df =df.iloc[0:3000]

In [5]:
# Define non-vegetarian ingredients
non_veg_ingredients = set([
    # Meat & Poultry
    "chicken", "beef", "pork", "mutton", "lamb", "turkey", "duck", "quail", "goat", "veal",
    "rabbit", "boar", "venison", "bison", "kangaroo", "goose", "pheasant", "pigeon", "elk",
    
    # Processed Meat Products
    "bacon", "ham", "sausage", "pepperoni", "salami", "chorizo", "pastrami", "prosciutto",
    "mortadella", "hot dog", "jerky", "liverwurst", "blood sausage", "scrapple",
    
    # Seafood
    "fish", "tuna", "salmon", "trout", "cod", "haddock", "mackerel", "sardine", "anchovy",
    "herring", "catfish", "bass", "snapper", "grouper", "halibut", "swordfish", "mahi mahi",
    "flounder", "eel", "shark", "sturgeon", "tilapia", "tuna steaks", "swordfish steaks",
    
    # Shellfish
    "shrimp", "prawns", "crab", "lobster", "crawfish", "squid", "octopus", "scallops",
    "mussels", "clams", "oysters", "abalone", "conch",
    
    # Animal-Based Ingredients
    "eggs", "gelatin", "lard", "suet", "tallow", "bone broth", "fish sauce", "oyster sauce",
    "shrimp paste", "anchovy paste", "worcestershire sauce", "caviar", "roe", "squid ink",
    
    # Organ Meats (Offal)
    "liver", "kidney", "heart", "brain", "tripe", "sweetbreads", "tongue", "gizzards"
])

# Function to classify recipes correctly
def classify_recipe(ingredients):
    # Step 1: Clean string formatting issues
    ingredients = str(ingredients).lower().replace('"', '').replace("c(", "").replace(")", "")

    # Step 2: Convert to a list of ingredients
    ingredient_list = [ing.strip() for ing in ingredients.split(",")]

    # Step 3: Check for partial matches
    if any(any(non_veg in ingredient for non_veg in non_veg_ingredients) for ingredient in ingredient_list):
        return "Non-Vegetarian"
    return "Vegetarian"


# Apply classification
df["DietaryCategory"] = df["RecipeIngredientParts"].apply(classify_recipe)

In [6]:
df['DietaryCategory'].unique()

array(['Vegetarian', 'Non-Vegetarian'], dtype=object)

In [8]:
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_row', None)
pd.set_option('display.max_colwidth', None)

In [9]:
# df[["RecipeIngredientParts", "DietaryCategory"]]

In [10]:
# Initialize Sentence Transformer Model

model = SentenceTransformer("all-MiniLM-L6-v2")

In [11]:
# Step 3: Compute embeddings for ingredients
df["IngredientEmbedding"] = df["RecipeIngredientParts"].apply(lambda x: model.encode(str(x), convert_to_tensor=True).tolist())

In [12]:

# Step 4: Save embeddings to a new file
df.to_csv("/home/minhas/cgn-dp-24-1/culinary_compass/data/recipes_with_embeddings.csv", index=False)

In [13]:
# Step 5: Load dataset with precomputed embeddings
df = pd.read_csv("/home/minhas/cgn-dp-24-1/culinary_compass/data/recipes_with_embeddings.csv")

In [14]:
df["IngredientEmbedding"] = df["IngredientEmbedding"].apply(lambda x: torch.tensor(eval(x)))

In [15]:
df.head()

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,RecipeCategory,Keywords,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,ReviewCount,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions,DietaryCategory,IngredientEmbedding
0,38,Low-Fat Berry Blue Frozen Dessert,1533,Dancer,PT24H,PT45M,PT24H45M,1999-08-09T21:46:00Z,Make and share this Low-Fat Berry Blue Frozen ...,"c(""https://img.sndimg.com/food/image/upload/w_...",Frozen Desserts,"c(""Dessert"", ""Low Protein"", ""Low Cholesterol"",...","c(""4"", ""1/4"", ""1"", ""1"")","c(""blueberries"", ""granulated sugar"", ""vanilla ...",4.5,4.0,170.9,2.5,1.3,8.0,29.8,37.1,3.6,30.2,3.2,4.0,,"c(""Toss 2 cups berries with sugar."", ""Let stan...",Vegetarian,"[tensor(-0.0447), tensor(-0.0615), tensor(0.01..."
1,39,Biryani,1567,elly9812,PT25M,PT4H,PT4H25M,1999-08-29T13:12:00Z,Make and share this Biryani recipe from Food.com.,"c(""https://img.sndimg.com/food/image/upload/w_...",Chicken Breast,"c(""Chicken Thigh & Leg"", ""Chicken"", ""Poultry"",...","c(""1"", ""4"", ""2"", ""2"", ""8"", ""1/4"", ""8"", ""1/2"", ...","c(""saffron"", ""milk"", ""hot green chili peppers""...",3.0,1.0,1110.7,58.8,16.6,372.8,368.4,84.4,9.0,20.4,63.4,6.0,,"c(""Soak saffron in warm milk for 5 minutes and...",Non-Vegetarian,"[tensor(-0.0943), tensor(-0.0170), tensor(-0.0..."
2,40,Best Lemonade,1566,Stephen Little,PT5M,PT30M,PT35M,1999-09-05T19:52:00Z,This is from one of my first Good House Keepi...,"c(""https://img.sndimg.com/food/image/upload/w_...",Beverages,"c(""Low Protein"", ""Low Cholesterol"", ""Healthy"",...","c(""1 1/2"", ""1"", NA, ""1 1/2"", NA, ""3/4"")","c(""sugar"", ""lemons, rind of"", ""lemon, zest of""...",4.5,10.0,311.1,0.2,0.0,0.0,1.8,81.5,0.4,77.2,0.3,4.0,,"c(""Into a 1 quart Jar with tight fitting lid, ...",Vegetarian,"[tensor(-0.0523), tensor(-0.0051), tensor(-0.0..."
3,41,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03T14:54:00Z,This dish is best prepared a day in advance to...,"c(""https://img.sndimg.com/food/image/upload/w_...",Soy/Tofu,"c(""Beans"", ""Vegetable"", ""Low Cholesterol"", ""We...","c(""12"", ""1"", ""2"", ""1"", ""10"", ""1"", ""3"", ""2"", ""2...","c(""extra firm tofu"", ""eggplant"", ""zucchini"", ""...",4.5,2.0,536.1,24.0,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,4 kebabs,"c(""Drain the tofu, carefully squeezing out exc...",Vegetarian,"[tensor(-0.0779), tensor(-0.0262), tensor(0.00..."
4,42,Cabbage Soup,1538,Duckie067,PT30M,PT20M,PT50M,1999-09-19T06:19:00Z,Make and share this Cabbage Soup recipe from F...,"""https://img.sndimg.com/food/image/upload/w_55...",Vegetable,"c(""Low Protein"", ""Vegan"", ""Low Cholesterol"", ""...","c(""46"", ""4"", ""1"", ""2"", ""1"")","c(""plain tomato juice"", ""cabbage"", ""onion"", ""c...",4.5,11.0,103.6,0.4,0.1,0.0,959.3,25.1,4.8,17.7,4.3,4.0,,"c(""Mix everything together and bring to a boil...",Vegetarian,"[tensor(-0.0476), tensor(0.0160), tensor(-0.06..."


In [14]:
def recommend_recipes(nutrients, ingredients, diet_preference):
    """Recommend recipes based on user nutrients, ingredients, and dietary preference."""

    # Filter dataset based on dietary preference
    if diet_preference != "Any":
        filtered_df = df[df["DietaryCategory"] == diet_preference].copy()
    else:
        filtered_df = df.copy()

    # Encode input ingredients
    input_ingredient_embedding = model.encode(" ".join(ingredients), convert_to_tensor=True)

    # Compute cosine similarity
    ingredient_similarities = util.pytorch_cos_sim(
        torch.stack(filtered_df["IngredientEmbedding"].tolist()), input_ingredient_embedding
    ).squeeze().numpy()

    # Normalize nutrient similarity
    nutrient_columns = ["Calories", "FatContent", "SaturatedFatContent", "CholesterolContent", 
                        "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]
    
    df_nutrients = filtered_df[nutrient_columns].fillna(0)
    nutrient_array = df_nutrients.to_numpy()

    # Compute Euclidean distance
    input_nutrient_array = np.array([nutrients[col] for col in nutrient_columns]).reshape(1, -1)
    nutrient_distances = np.linalg.norm(nutrient_array - input_nutrient_array, axis=1)
    nutrient_similarities = 1 / (1 + nutrient_distances)

    # Compute final scores
    final_scores = (0.3 * nutrient_similarities) + (0.7 * ingredient_similarities)

    # Rank recipes and return top results
    filtered_df["SimilarityScore"] = final_scores
    top_recipes = filtered_df.sort_values(by="SimilarityScore", ascending=False).head(5)

    return top_recipes[["Name", "Images", "RecipeInstructions"]].to_dict(orient="records")


In [15]:
# Step 7: User input for testing
user_nutrients = {
    "Calories": 500, "FatContent": 20, "SaturatedFatContent": 5,
    "CholesterolContent": 10, "SodiumContent": 500, "CarbohydrateContent": 50,
    "FiberContent": 10, "SugarContent": 10, "ProteinContent": 30
}

user_ingredients = [ "Egg, whole, raw, frozen, salted, pasteurized", "Cheese, American, restaurant",
                    "Cheese, cotija, solid", "Crustaceans, crab, alaska king, raw",
                      "Mollusks, clam, mixed species, raw",  "Seaweed, wakame, raw",
                    "Cream cheese, full fat, block"]
# Define the diet preference
diet_preference = "Any"# You can set this to "Vegetarian", "Non-Vegetarian", or "Any" based on user input

# Step 8: Get recommendations
recommendations = recommend_recipes(user_nutrients, user_ingredients, diet_preference)

# Step 9: Display results
for recipe in recommendations:
    print(f"Name: {recipe['Name']}\nImage: {recipe['Images']}\nInstructions: {recipe['RecipeInstructions']}\n{'-'*50}")

Name: Shrimp and Gruyere Cheesecake
Image: character(0)
Instructions: c("cheese (about 5 oz.)", "1 Tsp.", "pepper   --GARNISHES IF DESIRED:--   Red pepper strips   Whole cooked shrimp   Basil leaves   --TOMATO SAUCE:--   1/4 cup chopped onion   1 Clove crushed garlic   1 Tbsp.", "olive oil   2 (14 Oz.) cans whole tomatoes  1 1/2 Tsp.", "dried Italian seasoning   1 bay leaf  Serve with tomato sauce, recipe to follow.", "Combine cracker crumbs and 1/4 cup  melted butter.", "Press into bottom of 9 inch springform pan.", "Chop cooked and  cleaned shrimp.", "Saute peppers, onion and garlic in 3 tablespoons melted butter  for 4-5 minutes until tender, add chopped shrimp and saute another minute.  Drain well and set aside. Beat cream cheese and mayonnaise at high speed until  light and fluffy, add eggs, one at a time, beating well after each addition.", 
"Gradually add milk beating on low speed just until blended.", "Stir in shrimp  mixture, shredded Gruyere or Swiss and 1 teaspoon pepper.", 

In [17]:
df[df['Name']=="Chicken with Lemongrass"]

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,RecipeCategory,Keywords,RecipeIngredientQuantities,RecipeIngredientParts,AggregatedRating,ReviewCount,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions,DietaryCategory,IngredientEmbedding
1423,2873,Chicken with Lemongrass,148316,- Carla -,,PT0S,PT0S,1999-09-26T04:14:00Z,"The lemongrass, really compliments this easy and nutritious recipe. Serve with Asian noodles or steamed white rice.",character(0),Chicken,"c(""Poultry"", ""Meat"", ""Asian"", ""< 15 Mins"")","c(""2"", ""1"", ""1"", ""2"", ""1/2"", ""2"", ""2"", ""1"", ""1"", ""1"", ""2"", ""1/2"", ""2"", ""1/4"")","c(""onion"", ""garlic"", ""boneless chicken breasts"", ""pepper"", ""lemongrass"", ""fish sauce"", ""sugar"", ""curry paste"", ""curry powder"", ""red chile"", ""water"", ""coconut milk"", ""peanuts"", ""cilantro leaf"")",5.0,3.0,583.4,43.7,17.0,92.8,1519.9,14.4,2.6,6.7,35.9,2.0,,"c(""Heat oil and fry onion with garlic for 1 minute. Add chicken, black pepper, lemon grass, fish sauce, sugar, curry and chili and stir fry for 4 minutes."", ""Add chicken, black pepper, lemon grass, fish sauce, sugar, curry and chili pepper and stir fry 5 minutes."", ""Add water and continue to stir fry an additional 8 to 10 minutes, or until chicken is just about cooked through."", ""Remove smashed Lemongrass."", ""Add the coconut milk and simmer, stirring, until sauce it thickened."", ""Serve over Asian noodles or steamed white rice."", \n""Garnish with sprinkled chopped nuts and the freshly chopped cilantro."")",Non-Vegetarian,"[tensor(-0.0834), tensor(-0.0402), tensor(-0.0157), tensor(0.0585), tensor(0.0039), tensor(0.1192), tensor(0.0315), tensor(-0.0134), tensor(-0.0264), tensor(-0.0746), tensor(0.0814), tensor(-0.0984), tensor(-0.0244), tensor(-0.0784), tensor(0.0474), tensor(-0.0249), tensor(0.0796), tensor(0.0142), tensor(-0.0466), tensor(-0.1246), tensor(0.0185), tensor(0.0067), tensor(0.0434), tensor(0.0030), tensor(0.0257), tensor(0.0559), tensor(0.0307), tensor(-0.0120), tensor(-0.0529), tensor(-0.0739), tensor(-0.0130), tensor(-0.0169), tensor(0.0769), tensor(-0.0120), tensor(-0.0110), tensor(-0.0008), tensor(-0.0466), tensor(-0.0752), tensor(0.0801), tensor(0.0389), tensor(0.0129), tensor(-0.0003), tensor(0.0856), tensor(0.0178), tensor(0.0353), tensor(0.0129), tensor(-0.0460), tensor(0.0702), tensor(0.0027), tensor(-0.0231), tensor(-0.0726), tensor(0.0024), tensor(-0.0760), tensor(0.0249), tensor(0.0525), tensor(-0.0622), tensor(-0.0434), tensor(-0.0755), tensor(0.0384), tensor(0.0606), tensor(0.0209), tensor(-0.0337), tensor(-0.0049), tensor(0.0420), tensor(-0.0757), tensor(-0.0245), tensor(-0.0088), tensor(0.0777), tensor(0.0416), tensor(0.0040), tensor(-0.0592), tensor(0.0148), tensor(-0.0131), tensor(0.0678), tensor(-0.0235), tensor(0.0522), tensor(0.0515), tensor(-0.0738), tensor(0.0143), tensor(-0.0693), tensor(-0.0386), tensor(0.0655), tensor(0.0010), tensor(0.0960), tensor(-0.0053), tensor(0.0842), tensor(-0.0381), tensor(0.0109), tensor(0.0027), tensor(0.0016), tensor(0.0594), tensor(-0.0331), tensor(0.0113), tensor(-0.0075), tensor(-0.0164), tensor(0.0367), tensor(-0.0067), tensor(-0.1618), tensor(-0.0288), tensor(-0.0007), ...]"


# End

In [16]:
diet_preference = st.sidebar.selectbox("Dietary Preference:", ["Any", "Vegetarian", "Non-Vegetarian"])



non_veg_ingredients = ["chicken", "beef", "pork", "fish", "shrimp", "lamb", "eggs", "egg"]

df["DietaryCategory"] = df["RecipeIngredientParts"].apply(
    lambda ingredients: "Non-Vegetarian" if any(re.search(rf"\b{item}\b", str(ingredients).lower()) for item in non_veg_ingredients) else "Vegetarian"
)

# Display the updated DataFrame
print(df)


def recommend_recipes(nutrients, ingredients, diet_preference):
    """Recommend recipes based on user nutrients, ingredients, and dietary preference."""

    # Filter dataset based on dietary preference
    if diet_preference != "Any":
        filtered_df = df[df["DietaryCategory"] == diet_preference].copy()
    else:
        filtered_df = df.copy()

    # Encode input ingredients
    input_ingredient_embedding = model.encode(" ".join(ingredients), convert_to_tensor=True)

    # Compute cosine similarity
    ingredient_similarities = util.pytorch_cos_sim(
        torch.stack(filtered_df["IngredientEmbedding"].tolist()), input_ingredient_embedding
    ).squeeze().numpy()

    # Normalize nutrient similarity
    nutrient_columns = ["Calories", "FatContent", "SaturatedFatContent", "CholesterolContent", 
                        "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]
    
    df_nutrients = filtered_df[nutrient_columns].fillna(0)
    nutrient_array = df_nutrients.to_numpy()

    # Compute Euclidean distance
    input_nutrient_array = np.array([nutrients[col] for col in nutrient_columns]).reshape(1, -1)
    nutrient_distances = np.linalg.norm(nutrient_array - input_nutrient_array, axis=1)
    nutrient_similarities = 1 / (1 + nutrient_distances)

    # Compute final scores
    final_scores = (0.6 * nutrient_similarities) + (0.4 * ingredient_similarities)

    # Rank recipes and return top results
    filtered_df["SimilarityScore"] = final_scores
    top_recipes = filtered_df.sort_values(by="SimilarityScore", ascending=False).head(5)

    return top_recipes[["Name", "Images", "RecipeInstructions"]].to_dict(orient="records")


recommendations = recommend_recipes(user_nutrients, user_ingredients, diet_preference)



NameError: name 'st' is not defined

In [None]:
# Define non-vegetarian keywords
non_veg_keywords = set([
    # Meat & Poultry
    "chicken", "beef", "pork", "mutton", "lamb", "turkey", "duck", "quail", "goat", "veal",
    "rabbit", "boar", "venison", "bison", "kangaroo", "goose", "pheasant", "pigeon", "elk",

    # Processed Meat Products
    "bacon", "ham", "sausage", "pepperoni", "salami", "chorizo", "pastrami", "prosciutto",
    "mortadella", "hot dog", "jerky", "liverwurst", "blood sausage", "scrapple",

    # Seafood
    "fish", "tuna", "salmon", "trout", "cod", "haddock", "mackerel", "sardine", "anchovy",
    "herring", "catfish", "bass", "snapper", "grouper", "halibut", "swordfish", "mahi mahi",
    "flounder", "eel", "shark", "sturgeon", "tilapia", "tuna steaks", "swordfish steaks",

    # Shellfish
    "shrimp", "prawns", "crab", "lobster", "crawfish", "squid", "octopus", "scallops",
    "mussels", "clams", "oysters", "abalone", "conch",

    # Animal-Based Ingredients
    "eggs", "gelatin", "lard", "suet", "tallow", "bone broth", "fish sauce", "oyster sauce",
    "shrimp paste", "anchovy paste", "worcestershire sauce", "caviar", "roe", "squid ink",

    # Organ Meats (Offal)
    "liver", "kidney", "heart", "brain", "tripe", "sweetbreads", "tongue", "gizzards"
])

# Function to classify recipes
def classify_recipe(row):
    """
    Classifies a recipe as 'Vegetarian' or 'Non-Vegetarian' based on:
    - `RecipeIngredientParts`
    - `RecipeCategory`
    """
    # Extract ingredient list
    ingredients = str(row["RecipeIngredientParts"]).lower().replace('"', '').replace("c(", "").replace(")", "")
    ingredient_list = [ing.strip() for ing in ingredients.split(",")]

    # Extract category list
    categories = str(row["RecipeCategory"]).lower().replace('"', '').replace("c(", "").replace(")", "")
    category_list = [cat.strip() for cat in categories.split(",")]

    # Check for non-veg keywords in ingredients or category
    if any(any(non_veg in item for non_veg in non_veg_keywords) for item in ingredient_list + category_list):
        return "Non-Veg"

    return "Veg"

# Apply classification
df["DietaryCategory"] = df.apply(classify_recipe, axis=1)