# PGVector
##### References
- [https://python.langchain.com/docs/integrations/vectorstores/pgvector/](https://python.langchain.com/docs/integrations/vectorstores/pgvector/)

In [1]:
!pip install langchain_postgres


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Run PostgreSQL in Docker container

```bash
docker run --name pgvector-container -e POSTGRES_USER=langchain -e POSTGRES_PASSWORD=langchain -e POSTGRES_DB=langchain -p 6024:5432 -d pgvector/pgvector:pg16
```

# Prepare the HuggingFace embedding model

In [47]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

In [48]:
embeddings = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-base",
)

# Prepare the PostgreSQL Client

- I think you may need to run this command before applying the below code.
  ```bash
  sudo apt-get install libpq-dev python3-dev
  ```

In [49]:
from langchain_core.documents import Document
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector

In [50]:
# See docker command above to launch a postgres instance with pgvector enabled.
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"  # Uses psycopg3!
collection_name = "my_docs"

In [51]:
vector_store = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,
)

# Manage vector store

## Add items to vector store


In [52]:
docs = [
    Document(
        page_content="Những chú chó đang chạy ngoài đồng",
        metadata={"id": 1, "location": "pond", "topic": "animals"},
    ),
    Document(
        page_content="Đàn vịt đang bơi dưới ao",
        metadata={"id": 2, "location": "pond", "topic": "animals"},
    ),
    Document(
        page_content="Trái cây tươi chứa nhiều chất dinh dưỡng",
        metadata={"id": 3, "location": "market", "topic": "food"},
    ),
    Document(
        page_content="Sữa bò là thực phẩm không tốt cho trẻ nhỏ",
        metadata={"id": 4, "location": "market", "topic": "food"},
    ),
]

In [53]:
vector_store.add_documents(docs, ids=[doc.metadata["id"] for doc in docs])

[1, 2, 3, 4]

## Delete items from vector store

In [54]:
# vector_store.delete(ids=["3"])

# Query vector store

In [55]:
results = vector_store.similarity_search(
    "chó", k=10, filter={"id": {"$in": [1, 2, 7, 9]}}
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* Những chú chó đang chạy ngoài đồng [{'id': 1, 'topic': 'animals', 'location': 'pond'}]
* Đàn vịt đang bơi dưới ao [{'id': 2, 'topic': 'animals', 'location': 'pond'}]


In [56]:
vector_store.similarity_search(
    "ducks",
    k=10,
    filter={"id": {"$in": [1, 5, 2, 9]}, "location": {"$in": ["pond", "market"]}},
)

[Document(id='1', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}, page_content='Những chú chó đang chạy ngoài đồng'),
 Document(id='2', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}, page_content='Đàn vịt đang bơi dưới ao')]

In [57]:
vector_store.similarity_search(
    "fruit",
    k=10,
    filter={
        "$and": [
            {"id": {"$in": [1, 2, 3, 4]}},
        ]
    },
)

[Document(id='3', metadata={'id': 3, 'topic': 'food', 'location': 'market'}, page_content='Trái cây tươi chứa nhiều chất dinh dưỡng'),
 Document(id='4', metadata={'id': 4, 'topic': 'food', 'location': 'market'}, page_content='Sữa bò là thực phẩm không tốt cho trẻ nhỏ'),
 Document(id='1', metadata={'id': 1, 'topic': 'animals', 'location': 'pond'}, page_content='Những chú chó đang chạy ngoài đồng'),
 Document(id='2', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}, page_content='Đàn vịt đang bơi dưới ao')]

In [58]:
results = vector_store.similarity_search_with_score(query="cats", k=1)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.228845] Những chú chó đang chạy ngoài đồng [{'id': 1, 'topic': 'animals', 'location': 'pond'}]


## Query by turning into retriever

In [59]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 1})
retriever.invoke("kitty")

[Document(id='2', metadata={'id': 2, 'topic': 'animals', 'location': 'pond'}, page_content='Đàn vịt đang bơi dưới ao')]