In [42]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
load_dotenv()

llm = ChatOpenAI(model="gpt-4.1-2025-04-14", api_key=os.getenv("OPENAI_API_KEY"))
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # Initialize the embedding model with Google Generative AI embeddings

In [49]:
from langchain_community.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

pdf_path =os.path.join(os.getcwd(),"data","sample.pdf")
loader = PyPDFLoader(pdf_path) # Load the PDF file
documents = loader.load() # Split the full PDF into pages

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150, length_function=len) #chunk_size and chunk_overlap are hyperparameters
chunks = text_splitter.split_documents(documents) #Split the pages into smaller chunks
vectorstore = FAISS.from_documents(chunks, embedding_model) #Create a vector store from the chunks using the embedding model
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10}) # Create a retriever from the vector store for similarity search


In [50]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

parser = StrOutputParser() # Create a string output parser to parse the LLM's response
prompt_template = """
        Answer the question based on the context provided below. 
        If the context does not contain sufficient information, respond with: 
        "I do not have enough information about this."

        Context: {context}

        Question: {question}

        Answer:"""

def get_formatted_doc(documents):
    return "\n\n".join([doc.page_content for doc in documents]) # Format the retrieved documents into a single string

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) # Create a prompt template for the question-answering task
rag_chain = {"context": retriever | get_formatted_doc, "question":RunnablePassthrough()}| prompt | llm | parser # Create a RAG chain that retrieves relevant documents, formats them, and generates an answer using the LLM


In [51]:
rag_chain.invoke("How did Llama 2 perform against it peers?") # Invoke the RAG chain with a sample question

'Llama 2 performed competitively against its peers on standard academic benchmarks. Across code generation (Human-Eval, MBPP), world knowledge (NaturalQuestions, TriviaQA), and other grouped academic tasks, Llama 2 consistently outperformed previous Llama 1 models and showed improved results over other open-source models like MosaicML Pretrained Transformer (MPT) and Falcon. Larger Llama 2 models (such as 70B) achieved higher benchmark scores than smaller models and were often on par with, or close to, the performance of ChatGPT in certain evaluations.\n\nOverall, the results indicate that Llama 2 represents a significant step forward compared to earlier open-source models, especially as model size increases, and can match or closely approach the performance of leading commercial models on various tasks. However, its proficiency in languages other than English remains limited.'