In [None]:
import csv

# Load sample data (a restaurant menu of items)
with open('menu_items.csv') as file:
    lines = csv.reader(file)

    # Store the name of the menu items in this array. In Chroma, a "document" is a string i.e. name, sentence, paragraph, etc.
    documents = []

    # Store the corresponding menu item IDs in this array.
    metadata = []

    # Each "document" needs a unique ID. This is like the primary key of a relational database. We'll start at 1 and increment from there.
    ids = []
    id = 1

    for i, line in enumerate(lines):
        if i == 0:
            continue

        documents.append(line[1])
        metadata.append({"item_id": line[0]})
        ids.append(str(id))
        id += 1

In [None]:
import chromadb
from chromadb.utils import embedding_functions

chroma_client = chromadb.Client()

# Instantiate chromadb instance. Data is stored on disk (a folder named 'my_vectordb' will be created in the same folder as this file).
chroma_client = chromadb.PersistentClient(path="./chromadb_data")

In [9]:
# Select the embedding model to use.
# List of model names can be found here https://www.sbert.net/docs/pretrained_models.html
sentence_transformers_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2"
)

# Create the collection, aka vector database. Or, if database already exist, then use it. Specify the model that we want to use to do the embedding.
collection = chroma_client.create_collection(
    name="menu_items_collection",
    embedding_function=sentence_transformers_ef
)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [10]:
collection.add(
    documents=documents,
    metadatas=metadata,
    ids=ids
)

In [12]:
# Query the vector database.
query_results = collection.query(
    query_texts=["chocolate cake"],
    n_results=5,
    include=["documents", "metadatas", "distances"]
)
print(query_results["documents"])

[['Fried Cake', 'Grilled Cake', 'Grilled Cake', 'Roasted Cake', 'Roasted Cake']]


In [13]:
# Query the vector database.
query_results = collection.query(
    query_texts=["donut"],
    n_results=5,
    include=["documents", "metadatas", "distances"]
)
print(query_results["documents"])

[['Fried Smoothie', 'Fried Smoothie', 'Fried Cake', 'Spicy Cake', 'Spicy Cake']]


In [14]:
# Query the vector database.
query_results = collection.query(
    query_texts=["Chicken"],
    n_results=5,
    include=["documents", "metadatas", "distances"]
)
print(query_results["documents"])

[['Fried Chicken', 'Sweet Chicken', 'Sweet Chicken', 'Spicy Chicken', 'Crispy Chicken']]
