In [None]:
import json
import faiss
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer

with open("final_all_recipes.json", "r", encoding="utf-8") as file:
    data = json.load(file)

recipes = []
for recipe in data["payload"]["data"]:
    recipes.append({
        "title": recipe["Recipe_title"],
        "description": recipe["description"],
        "url": recipe["url"],
        "img_url": recipe.get("img_url", ""),
        "youtube_video": recipe.get("youtube_video", ""),
        "Sub_region": recipe.get("Sub_region", ""),
        "Calories": recipe.get("Calories", ""),
        "Protein": recipe.get("Protein (g)", ""),
        "Time": recipe.get("total_time", ""),
    })

df = pd.DataFrame(recipes)
df.to_csv("recipes_cleaned.csv", index=False)

print(f"Processed {len(df)} recipes.")

Processed 532 recipes.


In [None]:
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

df["text_for_embedding"] = (
    "Title: " + df["title"] + " | Description: " + df["description"] +
    " | Sub_region: " + df["Sub_region"] + " | Calories: " + df["Calories"].astype(str) +
    " | Protein: " + df["Protein"].astype(str) + "g | Time: " + df["Time"].astype(str) + " minutes"
)

embeddings = embed_model.encode(df["text_for_embedding"].tolist(), convert_to_numpy=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

faiss.write_index(index, "recipes_faiss.index")

with open("recipes_metadata.pkl", "wb") as f:
    pickle.dump(df, f)

print("FAISS index and metadata saved!")

FAISS index and metadata saved!


# Model

In [18]:
import re
import torch
import faiss
import os
import pickle
from sentence_transformers import CrossEncoder, SentenceTransformer

index = faiss.read_index("recipes_faiss.index")

with open("recipes_metadata.pkl", "rb") as f:
    df = pickle.load(f)
    
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

In [19]:
def safe_float(value, default=0.0):
    try:
        return float(value)
    except:
        return default

def search_recipe(query, k=3, top_n=50):
    protein_match = re.search(r'(\d+)\s*(g|grams?)\s+protein', query, re.I)
    protein_req = float(protein_match.group(1)) if protein_match else None

    time_match = re.search(r'within (\d+) minutes', query, re.I)
    time_req = float(time_match.group(1)) if time_match else None

    region_match = re.search(r'in (\w+) region', query, re.I)
    region_req = region_match.group(1) if region_match else None

    structured_parts = []
    if protein_req: structured_parts.append(f"{protein_req}g protein")
    if time_req: structured_parts.append(f"{time_req} minutes")
    if region_req: structured_parts.append(f"{region_req} cuisine")

    structured_query = " | ".join(["Recipe search:"] + structured_parts + ["Original query:", query])

    query_embedding = embed_model.encode([structured_query], convert_to_numpy=True)

    D, I = index.search(query_embedding, top_n)

    candidates = []
    for idx, distance in zip(I[0], D[0]):
        recipe = df.iloc[idx].to_dict()
        score = 1 / (1 + distance)
        candidates.append((score, recipe))

    cross_encoder_inputs = [(query, c[1]["title"] + " | " + c[1]["description"]) for c in candidates]

    cross_scores = cross_encoder.predict(cross_encoder_inputs)

    for i in range(len(candidates)):
        candidates[i] = (cross_scores[i], candidates[i][1])

    candidates.sort(reverse=True, key=lambda x: x[0])

    results = [c[1] for c in candidates[:k]]

    return results

In [22]:
query = "high protein Indian drink under 10 minutes"
results = search_recipe(query, k=3)

for r in results:
    print(f"Recipe: {r['title']}")
    print(f"Description: {r['description']}")
    print(f"Image: {r['img_url']}")
    print(f"URL: {r['url']}")
    print(f"Region: {r['Sub_region']}")
    print(f"Calories: {r['Calories']}")
    print(f"Protein: {r['Protein']}g")
    print(f"Time: {r['Time']} minutes")
    print(f"YouTube: {r['youtube_video']}\n")

Recipe: Mango Lassi II
Description: An Indian yogurt drink - smooth, creamy, and absolutely heavenly!
Image: https://images.media-allrecipes.com/userphotos/560x315/199894.jpg
URL: https://cosylab.iiitd.edu.in/recipedb2/#/recipe/4682
Region: Indian
Calories: 482.0
Protein: 26.761g
Time: 5 minutes
YouTube: https://www.youtube.com/results?search_query=How+to+make+Mango+Lassi+II

Recipe: Mango Lassi I
Description: A cooling drink made with mango and yogurt, Eastern Indian Lassi.
Image: https://images.media-allrecipes.com/userphotos/250x250/770743.jpg
URL: https://cosylab.iiitd.edu.in/recipedb2/#/recipe/4601
Region: Indian
Calories: 85.0
Protein: 9.8545g
Time: 7 minutes
YouTube: https://www.youtube.com/results?search_query=How+to+make+Mango+Lassi+I

Recipe: Indian Lassi
Description: Learn what a lassi drink is with this basic recipe for the popular Indian beverage. You can adjust the amount of yogurt or water for a thicker or thinner consistency. Garnish with fresh mint if desired.
Image: h