In [1]:
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.vectorstores.pgvector import PGVector
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv


In [2]:
load_dotenv()

True

In [3]:
loader = PyPDFLoader("./biden.pdf")
documents = loader.load()




In [4]:
embeddings = OpenAIEmbeddings()

In [5]:
text_splitter = SemanticChunker(embeddings)
docs = text_splitter.split_documents(documents)

In [6]:
docs

[Document(page_content='2/7/24, 8:43 PM Statement from President Joe Biden on Early Student Debt Cancellation for Borrowers Enrolled in SAVE | The White House\nhttps://www.whitehouse.gov/brieﬁng-room/statements-releases/2024/01/12/statement-from-president-joe-biden-on-early-student-debt-cancellation-for-borrowers-enr… 1/2JAN UAR Y 12, 2024\nStatement from President Joe\xa0Biden on Early\nStudent Debt Cancellation for Borrowers\nEnrolled in\xa0SAVE\nFrom Day One of my Administration, I vowed to ﬁx the student loan system\nand make sure higher education is a pathway to the middle class – not a\nbarrier to opportunity. Already, my Administration has cancelled student\ndebt for 3.6 million Americans through various actions – delivering\nlifechanging relief to students and families, and we created the most\naﬀordable student loan repayment plan ever: the SAVE plan. I am proud that my Administration is implementing one of the most\nimpactful provisions of the SAVE plan nearly six months ahea

In [9]:
CONNECTION_STRING = "postgresql+psycopg2://@localhost:5432/tester_rag"

In [10]:
COLLECTION_NAME = "biden_studentloan"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [15]:
query = "Will we cancel student loan debt?"
docs_with_score = db.similarity_search_with_score(query)

In [16]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.13901053121771534
2/7/24, 8:43 PM Statement from President Joe Biden on Early Student Debt Cancellation for Borrowers Enrolled in SAVE | The White House
https://www.whitehouse.gov/brieﬁng-room/statements-releases/2024/01/12/statement-from-president-joe-biden-on-early-student-debt-cancellation-for-borrowers-enr… 2/2Court’s decision on our student debt relief plan, we are continuing to pursue
an alternative path to deliver student debt relief to as many borrowers as
possible as quickly as possible. I won’t back down from using every tool at
our disposal to get student loan borrowers the relief they need to reach their
dreams.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.1407938167074727
2/7/24, 8:43 PM Statement from President Joe Biden on Early Student Debt Cancellation fo

In [17]:
docs_with_score = db.max_marginal_relevance_search_with_score(query)

In [18]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.13906186739887116
2/7/24, 8:43 PM Statement from President Joe Biden on Early Student Debt Cancellation for Borrowers Enrolled in SAVE | The White House
https://www.whitehouse.gov/brieﬁng-room/statements-releases/2024/01/12/statement-from-president-joe-biden-on-early-student-debt-cancellation-for-borrowers-enr… 2/2Court’s decision on our student debt relief plan, we are continuing to pursue
an alternative path to deliver student debt relief to as many borrowers as
possible as quickly as possible. I won’t back down from using every tool at
our disposal to get student loan borrowers the relief they need to reach their
dreams.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.1408428285312915
2/7/24, 8:43 PM Statement from President Joe Biden on Early Student Debt Cancellation fo

### Handling more connections to DB 

In [19]:
store = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=embeddings,
)

In [None]:
# store.add_documents([Document(page_content="foo")])