In [1]:
from sentence_transformers import SentenceTransformer
import chromadb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [3]:
# The sentences to encode
sentences = [
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
]

In [4]:
# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings)

[[ 0.01919574  0.12008538  0.15959832 ... -0.00536287 -0.08109498
   0.05021341]
 [-0.01869034  0.04151865  0.0743155  ...  0.00486602 -0.06190437
   0.03187511]
 [ 0.136502    0.0822732  -0.02526161 ...  0.0876204   0.03045844
  -0.01075749]]


In [5]:
# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)

tensor([[1.0000, 0.6660, 0.1046],
        [0.6660, 1.0000, 0.1411],
        [0.1046, 0.1411, 1.0000]])


In [6]:
chroma_client = chromadb.Client()

In [7]:
collection = chroma_client.get_or_create_collection(name="test_collection")

In [8]:
collection.add(
    documents=sentences,
    embeddings=embeddings,
    ids=["1", "2", "3"],
)

In [9]:
print(collection.peek())

{'ids': ['1', '2', '3'], 'embeddings': array([[ 0.01919574,  0.12008538,  0.15959832, ..., -0.00536287,
        -0.08109498,  0.05021341],
       [-0.01869034,  0.04151865,  0.0743155 , ...,  0.00486602,
        -0.06190437,  0.03187511],
       [ 0.136502  ,  0.0822732 , -0.02526161, ...,  0.0876204 ,
         0.03045844, -0.01075749]]), 'documents': ['The weather is lovely today.', "It's so sunny outside!", 'He drove to the stadium.'], 'uris': None, 'included': ['metadatas', 'documents', 'embeddings'], 'data': None, 'metadatas': [None, None, None]}


In [10]:
results = collection.query(
    query_texts=["What did he do today?"],
    n_results=2,
)

print(results)

{'ids': [['3', '1']], 'embeddings': None, 'documents': [['He drove to the stadium.', 'The weather is lovely today.']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None]], 'distances': [[1.088059902191162, 1.4133127927780151]]}
