In [None]:
from dotenv import load_dotenv 
import os

# Load environment variables from .env file
load_dotenv()

# Get the OpenAI API key
openai_key = os.getenv('OPENAI_API_KEY')

# Print the OpenAI API key
print(openai_key)

In [1]:
from langchain_community.document_loaders import PyPDFLoader

file_path = (
    "./data/Pasvalys.pdf"
)

loader = PyPDFLoader(file_path)

pasvalys_documents = loader.load()

In [None]:
from langchain_community.document_loaders import PyPDFLoader

file_path = (
    "./data/birzai.pdf"
)

loader = PyPDFLoader(file_path)

birzai_documents = loader.load()

from rich import print

print(birzai_documents)

In [None]:
merged_documents = pasvalys_documents + birzai_documents

print(merged_documents)

In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(merged_documents)

print(f"Split blog post into {len(all_splits)} sub-documents.")

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=openai_key)

from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

document_ids = vector_store.add_documents(documents=all_splits)


In [None]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)

In [None]:
from langchain.llms import OpenAI

llm = OpenAI(api_key=openai_key)

question = "Please compare Pasvalys and Birzai."

retrieved_docs = vector_store.similarity_search(question)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
promptAnswer = prompt.invoke({"question": question, "context": docs_content})
answer = llm.invoke(promptAnswer)
print(answer)