# Postgres + PGVECTOR examples

In [23]:
from langchain_core.globals import set_debug, set_verbose
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

set_debug(True)
set_verbose(False)

DEBUG:root:test


In [24]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [25]:
from langchain_postgres import PGVector

# See docker command above to launch a postgres instance with pgvector enabled.
connection = "postgresql+psycopg://postgres:postgres@localhost:5432/postgres"  # Uses psycopg3!
collection_name = "my_docs"


vector_store = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

In [26]:
from langchain_core.documents import Document

docs = [
    Document(
        page_content="there are cats in the pond",
        metadata={"id": 1, "location": "pond", "topic": "animals"},
    ),
    Document(
        page_content="ducks are also found in the pond",
        metadata={"id": 2, "location": "pond", "topic": "animals"},
    ),
    Document(
        page_content="fresh apples are available at the market",
        metadata={"id": 3, "location": "market", "topic": "food"},
    ),
    Document(
        page_content="the market also sells fresh oranges",
        metadata={"id": 4, "location": "market", "topic": "food"},
    ),
    Document(
        page_content="the new art exhibit is fascinating",
        metadata={"id": 5, "location": "museum", "topic": "art"},
    ),
    Document(
        page_content="a sculpture exhibit is also at the museum",
        metadata={"id": 6, "location": "museum", "topic": "art"},
    ),
    Document(
        page_content="a new coffee shop opened on Main Street",
        metadata={"id": 7, "location": "Main Street", "topic": "food"},
    ),
    Document(
        page_content="the book club meets at the library",
        metadata={"id": 8, "location": "library", "topic": "reading"},
    ),
    Document(
        page_content="the library hosts a weekly story time for kids",
        metadata={"id": 9, "location": "library", "topic": "reading"},
    ),
    Document(
        page_content="a cooking class for beginners is offered at the community center",
        metadata={"id": 10, "location": "community center", "topic": "classes"},
    ),
]

vector_store.add_documents(docs, ids=[doc.metadata["id"] for doc in docs])

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'idempotency_key': 'stainless-python-retry-b7a74615-0343-4a15-9f4a-9680a99ee3ee', 'post_parser': <function Embeddings.create.<locals>.parser at 0x1157bc720>, 'json_data': {'input': [[19041, 527, 19987, 304, 279, 36670], [1072, 14895, 527, 1101, 1766, 304, 279, 36670], [72408, 41776, 527, 2561, 520, 279, 3157], [1820, 3157, 1101, 31878, 7878, 85138], [1820, 502, 1989, 31324, 374, 27387], [64, 51067, 31324, 374, 1101, 520, 279, 24925], [64, 502, 11033, 8221, 9107, 389, 4802, 6825], [1820, 2363, 6469, 20628, 520, 279, 6875], [1820, 6875, 18939, 264, 17496, 3446, 892, 369, 6980], [64, 17677, 538, 369, 47950, 374, 9076, 520, 279, 4029, 4219]], 'model': 'text-embedding-3-large', 'encoding_format': 'base64'}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings
DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None tim

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [27]:
results = vector_store.similarity_search(
    "where are cats", k=10, filter={"id": {"$in": [1, 5, 2, 9]}}
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'idempotency_key': 'stainless-python-retry-22556b27-08ee-41dc-81db-a98f7d1b3112', 'post_parser': <function Embeddings.create.<locals>.parser at 0x16157e480>, 'json_data': {'input': [[2940, 527, 19987]], 'model': 'text-embedding-3-large', 'encoding_format': 'base64'}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16a0f23d0>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x161bd3da0> server_hostname='api.openai.com' timeout=None
DEBUG:httpcore.connection:start_tls.complete return_value=<httpcore._back

* there are cats in the pond [{'id': 1, 'topic': 'animals', 'location': 'pond'}]
* ducks are also found in the pond [{'id': 2, 'topic': 'animals', 'location': 'pond'}]
* the library hosts a weekly story time for kids [{'id': 9, 'topic': 'reading', 'location': 'library'}]
* the new art exhibit is fascinating [{'id': 5, 'topic': 'art', 'location': 'museum'}]


In [28]:
#embedding tradicional
print(embeddings.embed_query(text="cats"))

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'idempotency_key': 'stainless-python-retry-35f218b2-c055-4dd9-9c86-7d0c7d7c8207', 'post_parser': <function Embeddings.create.<locals>.parser at 0x16157c400>, 'json_data': {'input': [[38552]], 'model': 'text-embedding-3-large', 'encoding_format': 'base64'}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16a01a450>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x161bd3da0> server_hostname='api.openai.com' timeout=None
DEBUG:httpcore.connection:start_tls.complete return_value=<httpcore._backends.sync.S

[-0.05065784975886345, 0.021062159910798073, -0.010355295613408089, -0.0019056620076298714, 0.03001118078827858, 0.011505884118378162, -0.02870078757405281, -0.02034304104745388, 0.03758588805794716, 0.0278378464281559, -0.03675490617752075, -0.01337558962404728, -0.013703187927603722, -0.009780000895261765, -0.028445102274417877, 0.030426669865846634, -0.015437060967087746, -0.004071005154401064, 0.004146912135183811, 0.008988971821963787, -0.001224497682414949, -0.01632397249341011, 0.016355933621525764, 0.00777046661823988, 0.02932402305305004, 0.0031121817883104086, 0.009004952386021614, -0.0018916791304945946, -0.014670001342892647, 0.0052175987511873245, 0.03627549484372139, 0.05535608530044556, 0.033846475183963776, 0.014901717193424702, 0.0038033337332308292, -0.024657748639583588, 0.013902942650020123, -0.007474829442799091, 0.020327061414718628, -0.026239806786179543, -0.01826559007167816, -0.04279549419879913, -0.05142490938305855, -0.01466201152652502, -0.03365470841526985,

### Busca com score de similaridade

In [22]:
print(type(vector_store))

results = vector_store.similarity_search_with_score(query="cats", k=3)


for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

<class 'langchain_postgres.vectorstores.PGVector'>
* [SIM=0.554739] there are cats in the pond [{'id': 1, 'topic': 'animals', 'location': 'pond'}]
* [SIM=0.807236] ducks are also found in the pond [{'id': 2, 'topic': 'animals', 'location': 'pond'}]
* [SIM=0.830963] the book club meets at the library [{'id': 8, 'topic': 'reading', 'location': 'library'}]
