In [6]:
## Loading Environment Variables
from typing import List, Tuple
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.pgvector import PGVector
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document

In [8]:
loader = TextLoader('test.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

In [9]:
## PGVector needs the connection string to the database.
## We will load it from the environment variables.
import os
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver=os.environ.get("PGVECTOR_DRIVER", "psycopg2"),
    host=os.environ.get("PGVECTOR_HOST", "localhost"),
    port=int(os.environ.get("PGVECTOR_PORT", "5432")),
    database=os.environ.get("PGVECTOR_DATABASE", "postgres"),
    user=os.environ.get("PGVECTOR_USER", "postgres"),
    password=os.environ.get("PGVECTOR_PASSWORD", "postgres"),
)


## Example
# postgresql+psycopg2://username:password@localhost:5432/database_name

In [10]:
# The PGVector Module will try to create a table with the name of the collection. So, make sure that the collection name is unique and the user has the 
# permission to create a table.

db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name="KBitem",
    connection_string=CONNECTION_STRING,
)

query = "heaven"
docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query)

In [12]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.metadata)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.644190907472073
{'source': 'test.txt'}
--------------------------------------------------------------------------------


In [14]:
loader = TextLoader('test.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
docs = text_splitter.split_documents(documents)


db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name="KBitem",
    connection_string=CONNECTION_STRING,
)

In [20]:
query = "IF YOU DONT GO OVER THE TOP OF THE GOLF BALL YOU WONT SLICE IT. HERE IS A DRILL TO NOT SLICE YOUR GOLF BALL"
docs_with_score: List[Tuple[Document, float]] = db.similarity_search_with_score(query)
    
    
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.0
IF YOU DONT GO OVER THE TOP OF THE GOLF BALL YOU WONT SLICE IT. HERE IS A DRILL TO NOT SLICE YOUR GOLF BALL
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.8007276235701202
Genesis 1:1 In the beginning God created the heavens and the earth.
Genesis 1:2 And the earth was waste and void; and darkness was upon the face of the deep: and the Spirit of God moved upon the face of the waters.
Genesis 1:3 And God said, Let there be light: and there was light.
Genesis 1:4 And God saw the light, that it was good: and God divided the light from the darkness.
Genesis 1:5 And God called the light Day, and the darkness he called Night. And there was evening and there was morning, one day.
Genesis 1:6 And God said, Let there be a firmament in the midst of the waters, and let it divide the