In [1]:
import chromadb
from chromadb.utils import embedding_functions

CHROMA_DATA_PATH = "chroma_data/"
EMBED_MODEL = "all-MiniLM-L6-v2"
COLLECTION_NAME = "demo_docs"

client = chromadb.PersistentClient(path=CHROMA_DATA_PATH)

embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name=EMBED_MODEL
)

collection = client.get_or_create_collection(
    name=COLLECTION_NAME,
    embedding_function=embedding_func,
    metadata={"hnsw:space": "cosine"},
)

documents = [
    "The latest iPhone model comes with impressive features and a powerful camera.",
    "Exploring the beautiful beaches and vibrant culture of Bali is a dream for many travelers.",
    "Einstein's theory of relativity revolutionized our understanding of space and time.",
    "Traditional Italian pizza is famous for its thin crust, fresh ingredients, and wood-fired ovens.",
    "The American Revolution had a profound impact on the birth of the United States as a nation.",
    "Regular exercise and a balanced diet are essential for maintaining good physical health.",
    "Leonardo da Vinci's Mona Lisa is considered one of the most iconic paintings in art history.",
    "Climate change poses a significant threat to the planet's ecosystems and biodiversity.",
    "Startup companies often face challenges in securing funding and scaling their operations.",
    "Beethoven's Symphony No. 9 is celebrated for its powerful choral finale, 'Ode to Joy.'",
]

genres = [
    "technology",
    "travel",
    "science",
    "food",
    "history",
    "fitness",
    "art",
    "climate change",
    "business",
    "music",
]

collection.add(
    documents=documents,
    ids=[f"id{i}" for i in range(len(documents))],
    metadatas=[{"genre": g} for g in genres]
)

query_results = collection.query(
    query_texts=["Find me some delicious food!"],
    n_results=1,
)

  from tqdm.autonotebook import tqdm, trange
Add of existing embedding ID: id0
Add of existing embedding ID: id1
Add of existing embedding ID: id2
Add of existing embedding ID: id3
Add of existing embedding ID: id4
Add of existing embedding ID: id5
Add of existing embedding ID: id6
Add of existing embedding ID: id7
Add of existing embedding ID: id8
Add of existing embedding ID: id9
Insert of existing embedding ID: id0
Insert of existing embedding ID: id1
Insert of existing embedding ID: id2
Insert of existing embedding ID: id3
Insert of existing embedding ID: id4
Insert of existing embedding ID: id5
Insert of existing embedding ID: id6
Insert of existing embedding ID: id7
Insert of existing embedding ID: id8
Insert of existing embedding ID: id9


In [2]:
query_results.keys()

dict_keys(['ids', 'distances', 'metadatas', 'embeddings', 'documents', 'uris', 'data', 'included'])

In [3]:
query_results["documents"]

[['Traditional Italian pizza is famous for its thin crust, fresh ingredients, and wood-fired ovens.']]

In [4]:
query_results["ids"]

[['id3']]

In [5]:
query_results["distances"]

[[0.7638264262281599]]

In [6]:
query_results["metadatas"]

[[{'genre': 'food'}]]

In [7]:
query_results = collection.query()

ValueError: You must provide one of query_embeddings, query_texts, query_images, or query_uris.

In [None]:
query_results = collection.query(
    query_texts=["Teach me about history", "What's going on in the world?"],
    include=["documents", "distances"],
    n_results=2
)

In [None]:
query_results["documents"][0]

["Einstein's theory of relativity revolutionized our understanding of space and time.",
 'The American Revolution had a profound impact on the birth of the United States as a nation.']

In [None]:
query_results["distances"][0]

[0.6265883071778791, 0.6904193085134411]

In [None]:
query_results["documents"][1]

["Climate change poses a significant threat to the planet's ecosystems and biodiversity.",
 "Einstein's theory of relativity revolutionized our understanding of space and time."]

In [None]:
query_results["distances"][1]

[0.8002944098021354, 0.8882106777357496]

In [None]:
collection.query(
    query_texts=["Teach me about music history"],
    n_results=1
)

{'ids': [['id2']],
 'distances': [[0.7625819595952272]],
 'metadatas': [[{'genre': 'science'}]],
 'embeddings': None,
 'documents': [["Einstein's theory of relativity revolutionized our understanding of space and time."]],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [None]:
collection.query(
    query_texts=["Teach me about music history"],
    where={"genre": {"$eq": "music"}},
    n_results=1
)

{'ids': [['id9']],
 'distances': [[0.8186329970286632]],
 'metadatas': [[{'genre': 'music'}]],
 'embeddings': None,
 'documents': [["Beethoven's Symphony No. 9 is celebrated for its powerful choral finale, 'Ode to Joy.'"]],
 'uris': None,
 'data': None,
 'included': ['metadatas', 'documents', 'distances']}

In [None]:
query_results = collection.query(
    query_texts=["Teach me about music history"],
    where={"genre": {"$in": ["music", "history"]}},
    n_results=2
)

In [None]:
query_results["documents"]

[["Beethoven's Symphony No. 9 is celebrated for its powerful choral finale, 'Ode to Joy.'",
  'The American Revolution had a profound impact on the birth of the United States as a nation.']]

In [None]:
query_results["distances"]

[[0.8186329970286632, 0.8200413863890195]]

In [None]:
collection.update(
    ids=["id1", "id2"],
    documents=["The new iPhone is awesome!", "Bali has beautiful beaches"],
    metadatas=[{"genre": "tech"}, {"genre": "beaches"}]
)

In [None]:
query_results = collection.get(ids=["id1", "id2"])

In [None]:
query_results["documents"]

['The new iPhone is awesome!', 'Bali has beautiful beaches']

In [None]:
query_results["metadatas"]

[{'genre': 'tech'}, {'genre': 'beaches'}]