In [67]:
from sentence_transformers import SentenceTransformer
import chromadb

In [68]:
# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [69]:
# The sentences to encode
sentences = [
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
]

In [70]:
# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings)

[[ 0.01919573  0.12008536  0.15959834 ... -0.00536287 -0.08109501
   0.05021337]
 [-0.01869041  0.04151874  0.07431539 ...  0.00486599 -0.06190439
   0.03187513]
 [ 0.136502    0.08227324 -0.02526162 ...  0.08762044  0.03045843
  -0.01075751]]


In [71]:
# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)

tensor([[1.0000, 0.6660, 0.1046],
        [0.6660, 1.0000, 0.1411],
        [0.1046, 0.1411, 1.0000]])


In [72]:
chroma_client = chromadb.Client()

In [73]:
collection = chroma_client.get_or_create_collection(name="test_collection")

In [74]:
collection.add(
    documents=sentences,
    embeddings=embeddings,
    ids=["1", "2", "3"],
)

In [75]:
print(collection.peek())

{'ids': ['1', '2', '3'], 'embeddings': array([[ 0.01919573,  0.12008536,  0.15959834, ..., -0.00536287,
        -0.08109501,  0.05021337],
       [-0.01869041,  0.04151874,  0.07431539, ...,  0.00486599,
        -0.06190439,  0.03187513],
       [ 0.136502  ,  0.08227324, -0.02526162, ...,  0.08762044,
         0.03045843, -0.01075751]], shape=(3, 384)), 'documents': ['The weather is lovely today.', "It's so sunny outside!", 'He drove to the stadium.'], 'uris': None, 'included': ['metadatas', 'documents', 'embeddings'], 'data': None, 'metadatas': [None, None, None]}


In [76]:
results = collection.query(
    query_texts=["What did he do today?"],
    n_results=2,
)

print(results)

{'ids': [['3', '1']], 'embeddings': None, 'documents': [['He drove to the stadium.', 'The weather is lovely today.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None]], 'distances': [[1.088059902191162, 1.4133124351501465]]}
