In [1]:
from anaconda_ai.integrations.langchain import AnacondaQuantizedModelEmbeddings
from anaconda_ai import get_default_client

In [2]:
client = get_default_client()

In [3]:
embedding_models = [ model for model in client.models.list() if model.metadata.trainedFor.value == "sentence-similarity" ]
embedding_models

[ModelSummary(id='BAAI/bge-large-en-v1.5', name='bge-large-en-v1.5', metadata=ModelMetadata(numParameters=334090000, contextWindowSize=512, trainedFor=<TrainedFor.sentence_similarity: 'sentence-similarity'>, description='', files=[ModelQuantization(id='f2488172ac43d2e77b5d11ee24b503fdc819b6d68ea6679b87b814a2c088be15', modelFileName='bge-large-en-v1.5_Q4_K_M.gguf', method='Q4_K_M', sizeBytes=215769504, maxRamUsage=215115366, isDownloaded=True, localPath=PosixPath('/Users/mehmetsarica/.ai-navigator-alpha/models/BAAI/bge-large-en-v1.5/bge-large-en-v1.5_Q4_K_M.gguf')), ModelQuantization(id='3a8bfb92402faa24979dbca75397311c7a472a29d780e4aa1667950acbe17684', modelFileName='bge-large-en-v1.5_Q5_K_M.gguf', method='Q5_K_M', sizeBytes=245653920, maxRamUsage=244999782, isDownloaded=False, localPath=None), ModelQuantization(id='81c26006c344352df7f1aac14cad7eb6b4143eba2967c0f7afb5a067535760be', modelFileName='bge-large-en-v1.5_Q6_K.gguf', method='Q6_K', sizeBytes=277406112, maxRamUsage=276761149, i

In [4]:
embeddings_model = AnacondaQuantizedModelEmbeddings(model_name="bge-small-en-v1.5_Q4_K_M.gguf")

Output()

In [None]:
# embeddings_model.embed_query('test')

In [5]:
vectordb = client.vector_db.create()
vectordb

{'running': True,
 'host': 'localhost',
 'port': 5435,
 'database': 'ai-navigator-vector-db',
 'user': 'postgres',
 'password': 'postgres'}

In [6]:
from langchain_postgres import PGVector

host = vectordb['host']
port = vectordb['port']
database = vectordb['database']
user = vectordb['user']
password = vectordb['password']

connection = f"postgresql+psycopg://{user}:{password}@{host}:{port}/{database}"
collection_name = "test_collection"

vector_store = PGVector(
    embeddings=embeddings_model,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

In [7]:
from langchain_core.documents import Document

docs = [
    Document(
        page_content="wild birds gather at the lake",
        metadata={"id": 1, "location": "lake", "topic": "wildlife"},
    ),
    Document(
        page_content="fish can be spotted swimming in the lake",
        metadata={"id": 2, "location": "lake", "topic": "wildlife"},
    ),
    Document(
        page_content="fresh vegetables are sold at the farmers market",
        metadata={"id": 3, "location": "farmers market", "topic": "groceries"},
    ),
    Document(
        page_content="the farmers market features local honey products",
        metadata={"id": 4, "location": "farmers market", "topic": "groceries"},
    ),
    Document(
        page_content="the historical exhibit showcases ancient artifacts",
        metadata={"id": 5, "location": "heritage center", "topic": "history"},
    ),
    Document(
        page_content="a photography display is featured at the heritage center",
        metadata={"id": 6, "location": "heritage center", "topic": "history"},
    ),
    Document(
        page_content="a new bakery opened downtown on Park Avenue",
        metadata={"id": 7, "location": "Park Avenue", "topic": "dining"},
    ),
    Document(
        page_content="the science club meets at the research center",
        metadata={"id": 8, "location": "research center", "topic": "education"},
    ),
    Document(
        page_content="the research center hosts monthly astronomy nights",
        metadata={"id": 9, "location": "research center", "topic": "education"},
    ),
    Document(
        page_content="a yoga class for seniors is available at the wellness pavilion",
        metadata={"id": 10, "location": "wellness pavilion", "topic": "fitness"},
    ),
]

vector_store.add_documents(docs, ids=[doc.metadata["id"] for doc in docs])

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [8]:
results = vector_store.similarity_search("animal", k=3)

print('results', results)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

results [Document(id='2', metadata={'id': 2, 'topic': 'wildlife', 'location': 'lake'}, page_content='fish can be spotted swimming in the lake'), Document(id='1', metadata={'id': 1, 'topic': 'wildlife', 'location': 'lake'}, page_content='wild birds gather at the lake'), Document(id='5', metadata={'id': 5, 'topic': 'history', 'location': 'heritage center'}, page_content='the historical exhibit showcases ancient artifacts')]
* fish can be spotted swimming in the lake [{'id': 2, 'topic': 'wildlife', 'location': 'lake'}]
* wild birds gather at the lake [{'id': 1, 'topic': 'wildlife', 'location': 'lake'}]
* the historical exhibit showcases ancient artifacts [{'id': 5, 'topic': 'history', 'location': 'heritage center'}]
