In [1]:
from langchain_postgres.vectorstores import PGVector
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders.pdf import PDFPlumberLoader
from langchain_community.llms import Ollama

In [2]:
llm = Ollama(model="llama3")

In [3]:

# embeddings = GPT4AllEmbeddings(model_name="llama3")
embeddings = HuggingFaceEmbeddings()
# embeddings = HuggingFaceHubEmbeddings()

  from tqdm.autonotebook import tqdm, trange


In [4]:

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024, chunk_overlap=80, length_function=len, is_separator_regex=False
)

loader = PDFPlumberLoader("dummies/alice.pdf")
documents = loader.load_and_split()
chunks = text_splitter.split_documents(documents)

In [5]:
print(len(chunks))
print(len(documents))

205
80


In [6]:
connection_string = "postgresql+psycopg://postgres:postgres@localhost:5432/rag-demo"
collection_name = "state_of_union_vectors"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=chunks,
    collection_name=collection_name,
    connection=connection_string
)

In [7]:
query = "Who is alice"
results = db.similarity_search_with_score(query, k=5)
print(len(results))

5


In [8]:
vector_store = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    use_jsonb=True,
    create_extension=True,
    connection=connection_string
)

In [14]:
from langchain_core.documents import Document
from typing import List, Tuple

results: List[Tuple[Document, float]] = vector_store.similarity_search_with_score(query, k=5)

In [15]:
results[0]

(Document(page_content='dear!’\nI shall only look up and say ‘Who am I then? Tell me that first, and\nthen, if I like being that person, I’ll come up: if not, I’ll stay\ndown\nhere till I’m somebody else’—but, oh dear!” cried Alice, with a sudden\nburst of tears, “I do wish they _would_ put their heads down! I am so\n_very_ tired of being all alone here!”\nAs she said this she looked down at her hands, and was surprised to\nsee\nthat she had put on one of the Rabbit’s little white kid gloves while', metadata={'page': 7, 'Title': 'alice', 'source': 'dummies/alice.pdf', 'Creator': 'TextEdit', 'ModDate': "D:20240420211427Z00'00'", 'Producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'file_path': 'dummies/alice.pdf', 'total_pages': 80, 'CreationDate': "D:20240420211427Z00'00'"}),
 0.5143931056739934)