In [81]:
# Import necessary modules for URL handling, warnings, file path manipulation, and pretty printing
import urllib
import warnings
from pathlib import Path as p  # Import Path class for file and directory manipulation
from pprint import pprint  # For printing data structures in a readable format

# Importing libraries for data handling and machine learning
import pandas as pd  # For data manipulation and analysis
from langchain import PromptTemplate  # For creating prompt templates in LangChain
from langchain.chains.question_answering import load_qa_chain  # To load a Question Answering chain
from langchain.document_loaders import PyPDFLoader  # To load and process PDF documents
from langchain.text_splitter import RecursiveCharacterTextSplitter  # To split text into manageable chunks
from langchain.vectorstores import Chroma  # For managing and querying vector-based document embeddings
from langchain.chains import RetrievalQA  # To create a question-answering chain with a retriever

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Note: Restart the Python kernel if issues arise with LangChain imports.

In [82]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=GOOGLE_API_KEY,
                             temperature=0.2,convert_system_message_to_human=True)

In [83]:

pdf_loader = PyPDFLoader("./Github_Handbook.pdf")
pages = pdf_loader.load_and_split()
print(pages[3].page_content)

4
Why version control? (part 2)
 Your program worked well enough yesterday
 You made a lot of improvements last night...
...but you haven't gotten them to work yet
 You need to turn in your program now
 Has this ever happened to you?


In [84]:
len(pages)

31

RAG Pipeline: Embedding + Gemini (LLM)

In [85]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [86]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

In [87]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=GOOGLE_API_KEY)

In [88]:

vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})

In [89]:

qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True

)

In [90]:

# question = "Who is Newton?"
# result = qa_chain({"query": question})
# print(f"{result["result"]}\n")

In [91]:


# result["source_documents"]

In [92]:

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain
qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [94]:
question = "Who is James Clear"
result = qa_chain({"query": question})
result["result"]

'James Clear is an author whose work has appeared in the New York Times, Time, and Entrepreneur, and on CBS This Morning.  His website, jamesclear.com, receives millions of visitors each month. He is the creator of The Habits Academy. Thanks for asking!'

2025-01-14 18:22:26.351 
  command:

    streamlit run /Users/ankitpokhrel/Documents/RAG_Gemini/myenv/lib/python3.13/site-packages/ipykernel_launcher.py [ARGUMENTS]


In [96]:
import subprocess

# Run the Streamlit app from Jupyter Notebook
subprocess.run(["streamlit", "run", "app.py"])


  You can now view your Streamlit app in your browser.

  Local URL: http://localhost:8502
  Network URL: http://192.168.1.5:8502

  For better performance, install the Watchdog module:

  $ xcode-select --install
  $ pip install watchdog
            


KeyboardInterrupt: 