In [None]:
# API Keys Configuration
YOUR_GOOGLE_API_KEY = ""
YOUR_LANGCHAIN_API_KEY = ""

In [None]:
import os
import bs4
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import MessagesPlaceholder

In [None]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY"
os.environ["LANGCHAIN_API_KEY"] = "YOUR_LANGCHAIN_API_KEY"
os.environ["LANGCHAIN_PROJECT"] = "RAG"

In [51]:
import warnings
warnings.filterwarnings('ignore')

In [77]:
USE_LOCAL_EMBEDDINGS = False  # set to False if you want Gemini embeddings

if USE_LOCAL_EMBEDDINGS:
    from langchain_community.embeddings import SentenceTransformerEmbeddings
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    print("✅ Using local embeddings (all-MiniLM-L6-v2)")
else:
    from langchain_google_genai import GoogleGenerativeAIEmbeddings
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    print("✅ Using Gemini embeddings")

✅ Using Gemini embeddings


In [53]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model = "gemini-2.5-flash", convert_system_message_to_human=True)

In [54]:
print(model.invoke("hi, what is RAG?").content)

Hi! RAG is a really important and popular technique for making Large Language Models (LLMs) like ChatGPT even more powerful and reliable.

RAG stands for **Retrieval-Augmented Generation**.

Let's break down what that means and why it's so useful:

---

### What Problem Does RAG Solve?

LLMs are incredibly good at generating human-like text, but they have a few limitations:

1.  **Knowledge Cutoff:** Their knowledge is limited to the data they were trained on. If something happened after their last training update, they won't know about it.
2.  **Hallucinations:** Sometimes, if an LLM doesn't know the answer, it might confidently "make up" information that sounds plausible but is factually incorrect.
3.  **Lack of Specificity:** They might not have deep, specialized knowledge about a very specific domain (e.g., your company's internal policies, a niche scientific field).
4.  **No Source Citation:** They can't tell you *where* they got their information from, making it hard to verify.



In [63]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))),
)

In [64]:
doc = loader.load()

In [78]:
print(f"Loaded {len(doc)} document(s)")
print(f"First document preview: {doc[0].page_content[:200]}...")

Loaded 1 document(s)
First document preview: 

      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a ...


In [66]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

In [67]:
splits = text_splitter.split_documents(doc)

In [79]:
print(f"Split into {len(splits)} chunks")
print(f"First chunk preview: {splits[0].page_content[:200]}...")

Split into 63 chunks
First chunk preview: LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool con...


In [None]:
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
)

In [71]:
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000013DE8832650>, search_kwargs={})

In [72]:
system_prompt = (
    "You are an assistant for question answering tasks. "
    "Use the following pieces of retrieved context to answer the question "
    "If you don't know the answer, say that you don't know."
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

In [73]:
chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answering_chain = create_stuff_documents_chain(model, chat_prompt)
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

In [76]:
rag_chain.invoke({"input": "What is MRKL?"})

{'input': 'What is MRKL?',
 'context': [Document(id='ca0ad523-4120-4f77-8998-a04529dee03b', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='MRKL (Karpas et al. 2022), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).\nThey did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can wo