Getting Started
* docker pull ankane/pgvector
* docker-compose up -d

In [None]:
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector, DistanceStrategy
from langchain.docstore.document import Document

import os

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

embeddings = OpenAIEmbeddings()

connection_string = PGVector.connection_string_from_db_params(
    driver=os.environ.get("DB_DRIVER", "psycopg"),
    host=os.getenv('DB_HOST'),
    port=os.getenv('DB_PORT'),
    database=os.getenv('DB_DATABASE'),
    user=os.getenv('DB_USER'),
    password=os.getenv('DB_PASSWORD')
)

In [None]:
loader = TextLoader('./data/state_of_the_union.txt')
documents = loader.load()

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print(len(documents))
print(len(docs))

In [None]:
from typing import List, Tuple

collection_name = 'state_of_the_union'
# The PGVector Module will try to create a table with the name of the collection. 
# So, make sure that the collection name is unique and the user has the permission to create a table.
db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=collection_name,
    connection_string=connection_string,
)

In [None]:

query = "What did the president say about federal deficit?"
docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query)

In [None]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print(doc.metadata)
    print("-" * 80)

In [None]:
store = PGVector(
    connection_string=connection_string, 
    embedding_function=embeddings, 
    collection_name='state_of_the_union',
    distance_strategy=DistanceStrategy.COSINE
)

retriever = store.as_retriever(search_kwargs={"k": 1})

In [None]:

retriever.get_relevant_documents(query='What did the president say about federal deficit?')