In [31]:
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic import hub

load_dotenv()

True

In [32]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [33]:
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

In [34]:
loader = PyPDFLoader("important Placement Questions.pdf", extract_images=True)
docs = loader.load()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=25, chunk_overlap=15)
splits = text_splitter.split_documents(docs)

In [35]:
print(f"Number of pages loaded: {len(docs)}")
if len(docs) > 0:
    print(f"Sample content from page 1: {docs[0].page_content[:100]}")
else:
    print("!!! ERROR: No text found in PDF. Check if the file path is correct.")

Number of pages loaded: 10
Sample content from page 1: grow_pins
EssentialInterviewQuestionsforPython,Java,DataStructures,DBMS,andWebTechnologies.
Masterth


In [36]:
print(f"Number of chunks created: {len(splits)}")
if len(splits) == 0:
    print("!!! ERROR: Text splitter produced 0 chunks. Try reducing chunk_size.")

Number of chunks created: 912


In [37]:
if len(splits) > 0:
    vectorstore = Chroma.from_documents(
        documents=splits, 
        embedding=embeddings,
        persist_directory="./chroma_db"
    )
    print("Successfully created vector store.")
else:
    raise ValueError("The splits list is empty! Ensure your PDF loader actually found text.")

Successfully created vector store.


In [38]:
prompt = hub.pull("langchain-ai/retrieval-qa-chat")
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(vectorstore.as_retriever(), combine_docs_chain)

In [39]:
response = rag_chain.invoke({"input": "What are the main points of this document in python?"})
response=rag_chain.invoke({"input": "What are the important placement questions mentioned in the document?"})
print(response["answer"])


The document only contains the repeated phrase ",FirstOut)datastructure.I" and does not mention any important placement questions.
