In [None]:
import chromadb
from pathlib import Path
import json
from sentence_transformers import SentenceTransformer
from chromadb.utils import embedding_functions
import uuid

In [None]:
current_dir = Path.cwd()  # .../src/data_processing
project_root = current_dir.parent.parent  # Go up 2 levels to main dir

output_parent =  project_root / "data" / "processed"
raw_data_folder = project_root / "data" / "raw"

In [None]:
def load_3a2m_recipe_json(path):
    with open(path, "r") as f:
        return json.load(f)

In [None]:
def get_recipes_from_data(data):
    documents = []
    metadatas = []

    for recipe in data:

        recipe_text = (
            f"Recipe: {recipe['recipe_name']}\n"
            f"Directions: {recipe['directions']}"
        )

        documents.append(recipe_text)
        metadatas.append({
            "name": recipe["recipe_name"]
        })

    print(len(documents), " Documents Created.")
    print(len(metadatas), " Metadatas Created.")
    return documents, metadatas

In [None]:
model = SentenceTransformer("all-MiniLM-L6-v2")

chroma_client = chromadb.Client()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

collection = chroma_client.get_or_create_collection(name = "documents")

recipe_data = load_3a2m_recipe_json(output_parent / "3a2m_recipe_data.json")
recipe_documents, recipe_metadata = get_recipes_from_data(recipe_data)

collection.add(
    documents = recipe_documents,
    metadatas = recipe_metadata,
    ids=[str(uuid.uuid4()) for _ in recipe_documents]
)

In [None]:
query = "How to make pasta"

results = collection.query(
    query_texts=[query],
    n_results=3
)