In [24]:
from sentence_transformers import SentenceTransformer

# Load pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Sample texts
texts = ["What is a vector database?", "How do embeddings work?", "AI and ML applications"]

# Generate embeddings
embeddings = model.encode(texts)

In [25]:
print(embeddings)

[[ 0.02323129 -0.03152992 -0.11742654 ...  0.00061008  0.03184174
  -0.05963086]
 [ 0.01472877 -0.08033437 -0.00036405 ...  0.04816851  0.07535779
  -0.02965575]
 [-0.04623601 -0.07980984  0.02070424 ...  0.0248969  -0.03231503
  -0.07868269]]


In [28]:
import chromadb

# Initialize ChromaDB client
client = chromadb.PersistentClient(path="./chroma_data")

# Create a collection
collection = client.create_collection(name="text_search_ex")

# Add embeddings to the collection
collection.add(
    documents=texts,
    metadatas=[{"source": "doc1"}, {"source": "doc2"}, {"source": "doc3"}],
    ids=["1", "2", "3"],
    embeddings=embeddings
)

In [29]:
# Query with a new text
query_text = "Explain vector databases."
query_embedding = model.encode([query_text])

# Search in the collection
results = collection.query(query_embeddings=query_embedding, n_results=2)
print("Results:", results)

Results: {'ids': [['1', '2']], 'embeddings': None, 'documents': [['What is a vector database?', 'How do embeddings work?']], 'uris': None, 'data': None, 'metadatas': [[{'source': 'doc1'}, {'source': 'doc2'}]], 'distances': [[0.14219467561685617, 1.306303851125166]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}


In [30]:
# Query with a new text
query_text = "What is AI/ML?"
query_embedding = model.encode([query_text])

# Search in the collection
results = collection.query(query_embeddings=query_embedding, n_results=2)
print("Results:", results)

Results: {'ids': [['3', '1']], 'embeddings': None, 'documents': [['AI and ML applications', 'What is a vector database?']], 'uris': None, 'data': None, 'metadatas': [[{'source': 'doc3'}, {'source': 'doc1'}]], 'distances': [[0.5748007323879616, 1.4356092238719511]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}
