In [1]:
import chromadb
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize Sentence-Transformers model
# embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # A lightweight model for embeddings

# Save model to use it localy later
# embedding_model_path = "./models/all-MiniLM-L6-v2"
# embedding_model.save(embedding_model_path)

In [3]:
# Load model from path
embedding_model_path = "./models/all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(embedding_model_path)

In [4]:
# Initialize ChromaDB client
client = chromadb.PersistentClient(path="./chromadb")

In [5]:
# Delete the collection
client.delete_collection("hello-world")
# Create or load a collection
collection = client.get_or_create_collection("hello-world")

In [6]:
# Documents, Embeddings and metadata
texts = ["ChromaDB is open-source.", "ChromaDB is a vector database.", "Sentence-Transformers generate embeddings."]
embeddings_texts = embedding_model.encode(texts)
ids = ["doc1","doc2","doc3"]

In [7]:
# Add documents to ChromaDB
collection.add(
    documents = texts,
    embeddings = embeddings_texts,
    ids = ids
)

In [8]:
query_text = "What is ChromaDB?"
query_embedding = embedding_model.encode(query_text)

In [9]:
results = collection.query(
    query_embeddings = query_embedding,
    n_results = 2
)

In [10]:
results

{'ids': [['doc2', 'doc1']],
 'embeddings': None,
 'documents': [['ChromaDB is a vector database.', 'ChromaDB is open-source.']],
 'uris': None,
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[0.5258619857407117, 0.658771426842463]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [11]:
collection.get( ids = ["doc2","doc1"])

{'ids': ['doc1', 'doc2'],
 'embeddings': None,
 'documents': ['ChromaDB is open-source.', 'ChromaDB is a vector database.'],
 'uris': None,
 'data': None,
 'metadatas': [None, None],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}