<a href="https://colab.research.google.com/github/jainmadhur07/llamaindex-rag-app/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain langchain-google-genai langchain-community langchain-core



In [2]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from operator import itemgetter

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ''
os.environ['GOOGLE_API_KEY'] = ''

In [8]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.6.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.6.0-py3-none-any.whl (304 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/304.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.2/304.2 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.6.0


In [9]:
from google.colab import files

uploaded = files.upload()

filename = list(uploaded.keys())[0]
print(f"Uploaded file: {filename}")

# Load the PDF
print("Loading PDF...")
loader1 = PyPDFLoader(filename) pages = loader1.load()
print(f"Loaded {len(pages)} pages from PDF")

Saving NCERT-Class-12-Physics-Part-1.pdf to NCERT-Class-12-Physics-Part-1 (1).pdf
Uploaded file: NCERT-Class-12-Physics-Part-1 (1).pdf
Loading PDF...
Loaded 291 pages from PDF


In [10]:
pip install chromadb

Collecting chromadb
  Downloading chromadb-1.0.12-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting fastapi==0.115.9 (from chromadb)
  Downloading fastapi-0.115.9-py3-none-any.whl.metadata (27 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-4.4.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.34.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.34.0-py3-none-any.whl.metadata (2.4 kB)
Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb)
  Downloading opentelemetry_instrumentation_fastapi-0.55b0-py3-none-any.whl.metadata (2.2 kB)
Collecting opentelemetry-sdk>=1.2.0 (fr

In [11]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=800, chunk_overlap=50)
splits = text_splitter.split_documents(pages)

vectorstore = Chroma.from_documents(documents=splits, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0)

In [12]:
def ask_question(question):
    # Retrieve relevant documents
    docs = retriever.invoke(question)

    # Create context from retrieved documents
    context = "\n\n".join([doc.page_content for doc in docs])

    # Simple prompt template
    prompt = f"""
    Based on the following context from the physics textbook, answer the question clearly and accurately.

    Context:
    {context}

    Question: {question}

    Answer:
    """

    # Get response from LLM
    response = llm.invoke(prompt)
    return response.content

In [13]:
def main():
    print("PDF Q&A System Ready!")
    print("Ask questions about your physics textbook. Type 'quit' to exit.\n")

    while True:
        question = input("Your question: ")

        if question.lower() == 'quit':
            print("Goodbye!")
            break

        try:
            answer = ask_question(question)
            print(f"\nAnswer: {answer}\n")
            print("-" * 50)
        except Exception as e:
            print(f"Error: {e}")

In [None]:
if __name__ == "__main__":
    main()

PDF Q&A System Ready!
Ask questions about your physics textbook. Type 'quit' to exit.

