In [8]:
import pandas as pd
import re, json
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import ast
from pathlib import Path

In [2]:
from pathlib import Path
def load_recipes():
    base = Path.cwd() / "recipes.csv"
    recipes = []
    try:
        df = pd.read_csv(base)
        for _, row in df.iterrows():
            recipes.append({
                "title": row["Title"],
                "ingredients": row["Ingredients"],
                "instructions": row["Instructions"],
                "image": row.get("Image_Name", ""),
                "cleaned_ingredients": row.get("Cleaned_Ingredients", "")
            })
        print(f"Loaded {len(recipes)} recipes")
    except Exception as e:
        print("Error loading recipes:", e)
    return recipes

RECIPES = load_recipes()

Loaded 13501 recipes


In [4]:
# LOAD PRODUCTS

def load_hemkop_jsons(folder: Path):
    products = []
    for file in folder.glob("hemkop_*.json"):
        try:
            with open(file, encoding="utf-8") as f:
                data = json.load(f)
                for item in data:
                    name = item.get("title")
                    nutrition = item.get("nutrition", {})
                    url = item.get("url", "")
                    products.append({
                        "name": name,
                        "store": "Hemköp",
                        "url": url,
                        "price": item.get("price"),
                        "nutrition": nutrition
                    })
        except Exception as e:
            print(f"Error reading {file}: {e}")
    return products

def load_ica_jsons(folder: Path):
    products = []
    for file in folder.glob("ica_*.json"):
        try:
            with open(file, encoding="utf-8") as f:
                data = json.load(f)
                for item in data:
                    name = item.get("title")
                    nutrition = item.get("nutrition", {})
                    url = item.get("url", "")
                    products.append({
                        "name": name,
                        "store": "Ica",
                        "url": url,
                        "price": item.get("price"),
                        "nutrition": nutrition
                    })
        except Exception as e:
            print(f"Error reading {file}: {e}")
    return products

def load_all_products():
    base = Path.cwd() / "data"
    hemkop_data = load_hemkop_jsons(base)
    ica_data = load_ica_jsons(base)
    products = hemkop_data + ica_data
    print(f"Loaded {len(products)} products ({len(hemkop_data)} Hemköp, {len(ica_data)} ICA)")
    return products

PRODUCTS = load_all_products()
product_names = [p["name"] for p in PRODUCTS]

Loaded 3424 products (805 Hemköp, 2619 ICA)


In [36]:
len(product_names)

4852

In [51]:
product_names

['Potatis Mjölig Klass 1',
 'Potatis Fast Klass 1',
 'Bakpotatis Klass 1',
 'Potatis Fast Klass 1',
 'Sötpotatis Klass 1',
 'Potatis Delikatess Klass 1',
 'Potatis Fast Klass 2',
 'Potatis Amandine Klass 1',
 'Potatis Mjölig Klass 2',
 'Potatis Smått&gott Klass 1',
 'Potatis Fast Eko Klass 2',
 'Potatis Delikatess Klass 1',
 'Potatis Fast Klass 1',
 'Potatis Kok Stor Klass 1',
 'Potatis Smått&gott Klass 1',
 'Potatis Mjölig Klass 1',
 'Sötpotatis Eko',
 'Potatis Fast Klass 2',
 'Potatis Mjölig Klass 2',
 'Kycklingkebab Fryst',
 'Kyckling Lårfilé Svensk',
 'Kycklingfilé Bröstfilé Sverige',
 'Kycklinglårfil Färsk Sverige',
 'Kycklingfilé Strimlad Sverige',
 'Majskyckling Hel Sverige',
 'Kycklingben Marinerade Sverige',
 'Kycklingben Färska Sverige',
 'Kyckling Bröstfilé Sverige',
 'Kyckling Hel Färsk Sverige',
 'Majskyckling Bröstfilé Sverige',
 'Kycklingklubba Sverige',
 'Kyckling Ben Frysta',
 'Kyckling Vingar Frysta',
 'Kycklinglår Frysta',
 'Kyckling Klubba Fryst',
 'Kyckling Köttbul

In [22]:

# 1. embed recipes
model = SentenceTransformer('all-MiniLM-L6-v2')
# model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
recipe_texts = []
for r in RECIPES:
    title = str(r.get("Title") or r.get("title") or "")  # 转为字符串，避免NaN
    ingredients = r.get("Ingredients") or r.get("ingredients") or ""
    
    # 如果 ingredients 是字符串形式的列表，需要转成 Python list
    if isinstance(ingredients, str):
        try:
            ingredients_list = ast.literal_eval(ingredients)  # 将 "['1 cup sugar', ...]" 转为列表
        except:
            ingredients_list = [ingredients]
    else:
        ingredients_list = ingredients
    
    ingredients_text = " ".join([str(i) for i in ingredients_list])
    recipe_texts.append(title + " " + ingredients_text)

recipes_embeddings = model.encode(recipe_texts, convert_to_numpy=True)

# 2. create FAISS index
d = recipes_embeddings.shape[1]
recipe_index = faiss.IndexFlatL2(d)
recipe_index.add(recipes_embeddings)
faiss.write_index(recipe_index, "recipes_index.faiss")
np.save("recipes_embeddings.npy", recipes_embeddings)


In [33]:
model = SentenceTransformer('KBLab/sentence-bert-swedish-cased')
product_embeddings = model.encode(product_names, convert_to_numpy=True, show_progress_bar=True)
faiss.normalize_L2(product_embeddings)

product_dim = product_embeddings.shape[1]
product_index = faiss.IndexFlatL2(product_dim)
product_index.add(product_embeddings)
faiss.write_index(product_index, "product_index.faiss")
np.save("product_embeddings.npy", product_embeddings)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/118 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/710 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/107 [00:00<?, ?it/s]

In [14]:
# 加载 FAISS 索引
recipe_index = faiss.read_index("recipes_index.faiss")

# 如果你保存了 embeddings，可以直接加载
recipe_embeddings = np.load("recipes_embeddings.npy")

In [27]:
def retrieve_recipes(query, top_k=5):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_emb = model.encode([query])
    distances, indices = recipe_index.search(query_emb, top_k)
    return [RECIPES[i] for i in indices[0]]

In [44]:
def retrieve_products(ingredients, top_k=5):
    query_emb = model.encode([ingredients])
    distances, indices = product_index.search(query_emb, top_k)
    return [PRODUCTS[i] for i in indices[0] if i < len(PRODUCTS)]

In [29]:
query = "give me some vegetarian recipes "
retrieved = retrieve_recipes(query, top_k=5)


In [44]:
context_text = "\n".join([f"- {r['title']}: {r.get('instructions', '')}, {r.get('ingredients', [])}" for r in retrieved])
context_text

'- Vegetarian Buffalo “Meatballs” With Blue Cheese Dip: Arrange a rack in center of oven and preheat to 400°F. Coat a rimmed baking sheet with oil.\nPulse garlic in a food processor until finely chopped. Add celery, beans, and mushrooms and pulse until coarsely chopped. Transfer to a large bowl. Stir in egg, panko, and 3/4 tsp. salt. Using your hands, roll tablespoonfuls of bean mixture into balls. Transfer to prepared baking sheet, packing them snuggly. Roast veggie balls, turning halfway through, until firm and cooked through, 25–30 minutes.\nMeanwhile, cook butter, hot sauce, and maple syrup in a small saucepan over medium heat until butter is melted. Stir until smooth; set aside.\nWhisk sour cream, buttermilk, blue cheese, pepper, 1 Tbsp. plus 2 tsp. chives, and remaining 1/4 tsp. salt in a medium bowl. Top with remaining 1 tsp. chives.\nTransfer veggie balls to a large bowl. Toss with hot sauce mixture and serve with blue cheese dip alongside.\nVeggie balls can be formed, not cook

In [17]:
def slugify(title: str) -> str:
    s = re.sub(r"[^a-zA-Z0-9]+", "-", title.lower()).strip("-")
    return s[:80]

# Precompute IDs for recipes
for r in RECIPES:
    r["id"] = slugify(str(r["title"] if "title" in r else r.get("Title", "")) or str(r.get("title","")))
    if not r["id"]:
        r["id"] = slugify(r.get("Title","untitled"))

In [18]:
def _rec_title(r):
    return r.get("title") or r.get("Title") or "Untitled"

def parse_ingredients_field(val):
    """Recipes CSV stores ingredients as a python-ish list string."""
    if isinstance(val, list):
        return val
    s = str(val)
    try:
        out = ast.literal_eval(s)
        if isinstance(out, list):
            return [str(x) for x in out]
    except Exception:
        pass
    # fallback: split by commas
    return [x.strip() for x in s.split(",") if x.strip()]

def split_instructions(s: str):
    raw = str(s).replace("\r\n", "\n").replace("\r", "\n")
    parts = [p.strip(" \t") for p in raw.split("\n") if p.strip()]
    if len(parts) <= 1:
        # fallback to sentence-ish split
        parts = [p.strip() for p in re.split(r"\.\s+", raw) if p.strip()]
    return parts


In [37]:
import numpy as np

def recipe_detail_payload(r, sim_threshold: float = 0.3):
    model = SentenceTransformer('KBLab/sentence-bert-swedish-cased')
    title = _rec_title(r)
    ingredients = parse_ingredients_field(r.get("ingredients") or r.get("Ingredients",""))
    steps = split_instructions(r.get("instructions") or r.get("Instructions",""))

    ing_texts = [ing.lower() for ing in ingredients]
    ing_embeddings = model.encode(ing_texts, convert_to_numpy=True, show_progress_bar=False)
    faiss.normalize_L2(ing_embeddings)

    product_embeddings = np.load("product_embeddings.npy")
    sims = ing_embeddings @ product_embeddings.T  # shape = (num_ingredients, num_products)

    # --- 用向量匹配找到每个 ingredient 的最佳产品 ---
    mapped_links = []
    for i, ing in enumerate(ingredients):
        best_idx = int(np.argmax(sims[i]))
        best_score = float(sims[i, best_idx])

        if best_score < sim_threshold:
            mapped_links.append({
                "ingredient": ing,
                "product_name": None,
                "store": None,
                "price": None,
                "url": None
            })
            continue
        
        best_prod = PRODUCTS[best_idx]
        mapped_links.append({
            "ingredient": ing,
            "product_name": best_prod["name"],
            "store": best_prod.get("store"),
            "price": best_prod.get("price"),
            "url": best_prod.get("url", ""),
            "similarity": round(best_score, 3)
        })

    return {
        "id": r["id"],
        "title": title,
        "ingredients": ingredients,
        "steps": steps,
        "where_to_buy": mapped_links
    }


In [38]:
retrieved[0]

{'title': 'Vegetarian Buffalo “Meatballs” With Blue Cheese Dip',
 'ingredients': '[\'1 tablespoon vegetable oil\', \'2 garlic cloves\', \'1 celery stalk, coarsely chopped\', \'1 (15-ounce) can white beans, rinsed, drained\', \'4 ounces button mushrooms\', \'1 large egg\', \'1 cup panko (Japanese breadcrumbs)\', \'1 teaspoon kosher salt, divided\', \'5 tablespoons unsalted butter\', "1/2 cup hot pepper sauce, preferably Frank\'s", \'2 tablespoons pure maple syrup\', \'1/2 cup sour cream\', \'1/4 cup buttermilk\', \'1/4 cup crumbled blue cheese\', \'1/2 teaspoon freshly ground black pepper\', \'2 tablespoons chopped chives, divided\']',
 'instructions': 'Arrange a rack in center of oven and preheat to 400°F. Coat a rimmed baking sheet with oil.\nPulse garlic in a food processor until finely chopped. Add celery, beans, and mushrooms and pulse until coarsely chopped. Transfer to a large bowl. Stir in egg, panko, and 3/4 tsp. salt. Using your hands, roll tablespoonfuls of bean mixture into 

In [39]:
recipe_detail_payload(retrieved[0])

{'id': 'vegetarian-buffalo-meatballs-with-blue-cheese-dip',
 'title': 'Vegetarian Buffalo “Meatballs” With Blue Cheese Dip',
 'ingredients': ['1 tablespoon vegetable oil',
  '2 garlic cloves',
  '1 celery stalk, coarsely chopped',
  '1 (15-ounce) can white beans, rinsed, drained',
  '4 ounces button mushrooms',
  '1 large egg',
  '1 cup panko (Japanese breadcrumbs)',
  '1 teaspoon kosher salt, divided',
  '5 tablespoons unsalted butter',
  "1/2 cup hot pepper sauce, preferably Frank's",
  '2 tablespoons pure maple syrup',
  '1/2 cup sour cream',
  '1/4 cup buttermilk',
  '1/4 cup crumbled blue cheese',
  '1/2 teaspoon freshly ground black pepper',
  '2 tablespoons chopped chives, divided'],
 'steps': ['Arrange a rack in center of oven and preheat to 400°F. Coat a rimmed baking sheet with oil.',
  'Pulse garlic in a food processor until finely chopped. Add celery, beans, and mushrooms and pulse until coarsely chopped. Transfer to a large bowl. Stir in egg, panko, and 3/4 tsp. salt. Usin

In [40]:
retrieved

[{'title': 'Vegetarian Buffalo “Meatballs” With Blue Cheese Dip',
  'ingredients': '[\'1 tablespoon vegetable oil\', \'2 garlic cloves\', \'1 celery stalk, coarsely chopped\', \'1 (15-ounce) can white beans, rinsed, drained\', \'4 ounces button mushrooms\', \'1 large egg\', \'1 cup panko (Japanese breadcrumbs)\', \'1 teaspoon kosher salt, divided\', \'5 tablespoons unsalted butter\', "1/2 cup hot pepper sauce, preferably Frank\'s", \'2 tablespoons pure maple syrup\', \'1/2 cup sour cream\', \'1/4 cup buttermilk\', \'1/4 cup crumbled blue cheese\', \'1/2 teaspoon freshly ground black pepper\', \'2 tablespoons chopped chives, divided\']',
  'instructions': 'Arrange a rack in center of oven and preheat to 400°F. Coat a rimmed baking sheet with oil.\nPulse garlic in a food processor until finely chopped. Add celery, beans, and mushrooms and pulse until coarsely chopped. Transfer to a large bowl. Stir in egg, panko, and 3/4 tsp. salt. Using your hands, roll tablespoonfuls of bean mixture in

In [41]:
parse_ingredients_field(retrieved)

[{'title': 'Vegetarian Buffalo “Meatballs” With Blue Cheese Dip',
  'ingredients': '[\'1 tablespoon vegetable oil\', \'2 garlic cloves\', \'1 celery stalk, coarsely chopped\', \'1 (15-ounce) can white beans, rinsed, drained\', \'4 ounces button mushrooms\', \'1 large egg\', \'1 cup panko (Japanese breadcrumbs)\', \'1 teaspoon kosher salt, divided\', \'5 tablespoons unsalted butter\', "1/2 cup hot pepper sauce, preferably Frank\'s", \'2 tablespoons pure maple syrup\', \'1/2 cup sour cream\', \'1/4 cup buttermilk\', \'1/4 cup crumbled blue cheese\', \'1/2 teaspoon freshly ground black pepper\', \'2 tablespoons chopped chives, divided\']',
  'instructions': 'Arrange a rack in center of oven and preheat to 400°F. Coat a rimmed baking sheet with oil.\nPulse garlic in a food processor until finely chopped. Add celery, beans, and mushrooms and pulse until coarsely chopped. Transfer to a large bowl. Stir in egg, panko, and 3/4 tsp. salt. Using your hands, roll tablespoonfuls of bean mixture in