# storage

project to explore using Chromadb databases

based on RealPython tutorial

In [1]:
import chromadb
from chromadb.utils import embedding_functions


In [30]:
CHROMA_DATA_PATH="chroma_data/"
EMBED_MODE = "all-MiniLM-L6-v2"
COLLECTION_NAME = "demo_docs2"

In [31]:
client = chromadb.PersistentClient(path=CHROMA_DATA_PATH)

In [32]:
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBED_MODE)

In [33]:
collection = client.create_collection(
    name=COLLECTION_NAME,
    embedding_function=embedding_func, # type: ignore
    metadata={"hnsw:space":"cosine"}
)

In [34]:
documents = [
    "The latest iPhone model comes with impressive features and a powerful camera.",
    "Exploring the beautiful beaches and vibrant culture of Bali is a dream for many travelers.",
    "Einstein's theory of relativity revolutionized our understanding of space and time.",
    "Traditional Italian pizza is famous for its thin crust, fresh ingredients, and wood-fired ovens.",
    "The American Revolution had a profound impact on the birth of the United States as a nation.",
    "Regular exercise and a balanced diet are essential for maintaining good physical health.",
    "Leonardo da Vinci's Mona Lisa is considered one of the most iconic paintings in art history.",
    "Climate change poses a significant threat to the planet's ecosystems and biodiversity.",
    "Startup companies often face challenges in securing funding and scaling their operations.",
    "Beethoven's Symphony No. 9 is celebrated for its powerful choral finale, 'Ode to Joy.'",
]

genres = [
    "technology",
    "travel",
    "science",
    "food",
    "history",
    "fitness",
    "art",
    "climate change",
    "business",
    "music",
]

collection.add(
    documents=documents,
    ids=[f"id{i}" for i in range(len(documents))],
    metadatas=[{"genre": g} for g in genres],
)

In [38]:
query_results = collection.query(
    query_texts=["Teach me about history",
                 "What's going on in the world?"],
    
    include=["documents","distances"],
    n_results=2,
)

In [39]:
query_results["documents"][0]

["Einstein's theory of relativity revolutionized our understanding of space and time.",
 'The American Revolution had a profound impact on the birth of the United States as a nation.']

In [40]:
query_results["distances"][0]

[0.6265882955482529, 0.6904193397408608]