# PgVector

In [1]:
%load_ext autoreload
%autoreload 2

Run Postgre Database in a Docker container:
  ```bash
  docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector
  ```

Import dependencies

In [2]:
from haystack import component, Pipeline, Document
from typing import List
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore
from haystack.utils import Secret

Create PgDocument store

In [3]:
document_store = PgvectorDocumentStore(
    embedding_dimension=768,
    vector_function="cosine_similarity",
    recreate_table=True,
    search_strategy="hnsw",
    connection_string=Secret.from_token("postgresql://postgres:postgres@localhost:5432/postgres")
)


In [4]:
document_store.write_documents(
    [
        Document(content="Is there a way to prevent the model from overfitting?",
                 meta={"type": "question","domain": "ai"},
                 embedding=[0.1]*768),
        Document(content="How can I reduce the overfitting of my model?",
                 meta={"type": "question","domain": "ai"},
                 embedding=[0.2]*768),
        Document(content="How to avoid overfitting in my model?",
                 meta={"type": "question","domain": "ai"},
                 embedding=[0.3]*768),
        Document(content="To avoid overfitting, you can use techniques like early stopping and dropout.",
                 meta={"type": "answer","domain": "ai"},
                 embedding=[0.4]*768),
        Document(content="Kubernetes is a portable, extensible, open-source platform for managing containerized workloads and services, that facilitates both declarative configuration and automation.",
                 meta={"type": "answer","domain": "cloud"},
                 embedding=[0.5]*768),
        Document(content="Harry Potter is a series of seven fantasy novels written by British author, J. K. Rowling.",
                 meta={"type": "answer","domain": "books"},
                 embedding=[0.6]*768),
    ]
)

6

In [5]:
document_store.filter_documents()

[Document(id=663f2832336e54688a5a8e54e14ccb818a3a0cc19a838642b3911e7b1f9108c7, content: 'Is there a way to prevent the model from overfitting?', meta: {'type': 'question', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=d995dcbb3d8b0a11b9d6831e5b09db13f7596489dfd5169e738ebf0feb73c9df, content: 'How can I reduce the overfitting of my model?', meta: {'type': 'question', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=f56644a79652d6915efbfa0d7813a17856071e9e3a91df8215651344a0444232, content: 'How to avoid overfitting in my model?', meta: {'type': 'question', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=1c7b7d13d0617eb87918e50892d86fb4e48f0a7be1a0296f1c819e8822d4bd77, content: 'To avoid overfitting, you can use techniques like early stopping and dropout.', meta: {'type': 'answer', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=31793cbf330659fe8a8fa147b70a78a96924f76784e52176d97912cac5caeb40, content: 'Kubernetes is a portable, ext

Filter the documents based on the domain field

In [10]:
filters = {
    "field": "meta.domain",
    "operator": "==",
    "value": "ai",
}

results = document_store.filter_documents(filters=filters)

results

[Document(id=663f2832336e54688a5a8e54e14ccb818a3a0cc19a838642b3911e7b1f9108c7, content: 'Is there a way to prevent the model from overfitting?', meta: {'type': 'question', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=d995dcbb3d8b0a11b9d6831e5b09db13f7596489dfd5169e738ebf0feb73c9df, content: 'How can I reduce the overfitting of my model?', meta: {'type': 'question', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=f56644a79652d6915efbfa0d7813a17856071e9e3a91df8215651344a0444232, content: 'How to avoid overfitting in my model?', meta: {'type': 'question', 'domain': 'ai'}, embedding: vector of size 768),
 Document(id=1c7b7d13d0617eb87918e50892d86fb4e48f0a7be1a0296f1c819e8822d4bd77, content: 'To avoid overfitting, you can use techniques like early stopping and dropout.', meta: {'type': 'answer', 'domain': 'ai'}, embedding: vector of size 768)]

In [11]:
filters = {
    "operator": "AND",
    "conditions": [
        {"field": "meta.domain", "operator": "==", "value": "ai"},
        {"field": "meta.type", "operator": "==", "value": "answer"},
    ]
}

results = document_store.filter_documents(filters=filters)

results

[Document(id=1c7b7d13d0617eb87918e50892d86fb4e48f0a7be1a0296f1c819e8822d4bd77, content: 'To avoid overfitting, you can use techniques like early stopping and dropout.', meta: {'type': 'answer', 'domain': 'ai'}, embedding: vector of size 768)]