In [1]:
import pandas as pd
import pathlib
from recipe_preprocessor import import_recipes
from sentence_transformers import SentenceTransformer
from annoy import AnnoyIndex

Prepare the BERT model:

In [2]:
transformer_model: str = "all-MiniLM-L12-v2"

model: SentenceTransformer = SentenceTransformer(transformer_model)

Load data.

In [3]:
recipe_path = pathlib.Path("./data/recipes.csv")
recipes: pd.DataFrame = import_recipes(recipe_path)

In [70]:
ingredients = recipes["RecipeIngredientParts"].values

In [77]:
macronutrient_cols = [
    "Calories",
    "FatContent",
    "SaturatedFatContent",
    "CholesterolContent",
    "SodiumContent",
    "CarbohydrateContent",
    "FiberContent",
    "SugarContent",
    "ProteinContent",
]

macro_grouper = lambda row: [row[col] for col in macronutrient_cols]

recipes["Macros"] = recipes.apply(macro_grouper, axis=1)

In [80]:
len(recipes["Macros"][0])

9

In [82]:
embedding_indexes = zip(recipes["Macros"].index, recipes["Macros"])
vec_size = 9

index = AnnoyIndex(vec_size, "angular")

for embed in embedding_indexes:
    index.add_item(embed[0], embed[1])

index.build(10)
index.save(f"testIndexMacros.ann")

True

In [84]:
index = AnnoyIndex(vec_size, "angular")
index.load("testIndexMacros.ann")

True

In [4]:
recipes["Macros"].iloc[100]

<pandas.core.indexing._iLocIndexer at 0x7f46bc7e83b0>

In [92]:
testEmbedding = [55.0, 2.0, 0.0, 0.0, 7.0, 8.0, 1.0, 5.0, 1.5]
vectors = [recipes["Name"].loc[i] for i in index.get_nns_by_vector(testEmbedding, 10)]

In [93]:
vectors

['Korean Sesame Seed Cookies',
 'Roasted Cherry or Grape Tomatoes',
 'Portuguese Beef and Onions (Bifes De Cebolada)',
 "Julie's Rhubarb Bars or Crisp",
 'Peanut Butter, Banana, Chocolate Chip Brownies (Vegan or Not)',
 'Roasted Tomato, Pepper, and Red Onion Soup',
 'Raw Apple Crumble (No Bake)',
 'Baked Vegetable Ratatouille',
 'Maple-Pecan Granola',
 'Siena Cake - Panforte de Siena']