# Retrieval Augmented Generation (RAG) 
  * Before learn about rag you must have to learn about agents,So must follow this docs \
https://docs.langchain.com/oss/python/langchain/agents

In [23]:
import os 
import getpass 

os.environ['LANGCHAIN_TRACING_V2']='true'

if not os.environ.get('LANGCHAIN_API_KEY'): 
    os.environ['LANGCHAIN_API_KEY'] = getpass.getpass('Enter your langsmith api key')

os.environ['LANGCHAIN_PROJECT'] = 'Build a RAG agent with LangChain'

In [24]:
if not os.environ.get('GEMINI_API_KEY'): 
    os.environ['GEMINI_API_KEY']=getpass.getpass('Enter your Gemini api key:') 

In [35]:
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings

model = ChatGoogleGenerativeAI(model='gemini-2.5-flash',
                               temperature=1.0, 
                               max_tokens=None, 
                               timeout=None, 
                               max_retries=2,)

# embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001') 


In [36]:
from langchain_huggingface import HuggingFaceEmbeddings 

model_kwargs = {'device': 'cuda'}
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs=model_kwargs
)

In [37]:
from langchain_chroma import Chroma

db_path = './chroma_db' 
vector_store = Chroma(
    collection_name='example_collection',
    embedding_function=embeddings,
    persist_directory=db_path
)

In [38]:
from langchain_community.document_loaders import WebBaseLoader 
import bs4 

URL = 'https://lilianweng.github.io/posts/2023-06-23-agent/' 

bs4_strainer = bs4.SoupStrainer(
    class_ = ('post-content','post-title','post-header') 
) 
loader = WebBaseLoader( 
    web_path=URL,
    bs_kwargs={'parse_only':bs4_strainer}, 
) 
docs = loader.load() 

In [39]:
print(docs[0].page_content[:500]) 
print('len of whole docs: ',len(docs[0].page_content))



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In
len of whole docs:  43047


In [40]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter( 
    chunk_size=500,
    chunk_overlap=200,
    add_start_index=True
)

splits = text_splitter.split_documents(docs)
print(splits[0].page_content)
len(splits)

LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


146

In [41]:
vector_store.add_documents(splits)

['211b3452-0032-468a-81dd-3a9769c08fda',
 'e9772f39-86f5-46ba-9a8f-2c29d73633db',
 '8086dede-88e3-4d22-bb06-2485e249c9c0',
 '0514944e-51cc-41ed-86d8-ca07250938e1',
 '075187f5-e2f6-4209-b0a7-8e53164b8594',
 '201e9980-2c58-47af-af42-5b9170275fe2',
 '357f113d-02ec-4bf2-9bf5-bcd67a3c9192',
 '651d23aa-6115-44c1-bce1-31ef4ac2b5ae',
 'c24156a6-d499-462a-b5bb-a23bc4f176da',
 '6fadf71d-e593-4229-8cc5-46d7ac8614b2',
 '3826d394-71dc-4c2a-ae67-0ca316899069',
 'eff76687-f547-4aac-b9c9-48859b26f2ec',
 '7ec06796-17c5-4150-9e8b-4008e4eb771c',
 '370313ef-8d34-4f8f-96e7-6cb7ad54c8eb',
 '6db45be7-0d6f-4579-b714-e9996dcdbe13',
 '3a403d26-47d3-4acf-a0b8-cd2be26e3b04',
 '4a70c9d9-72a0-463a-81b1-4b1616b7d487',
 '315e9e10-73ff-4b0b-b5b3-229e43de6604',
 '62cd9e30-a125-451d-a712-ad1bb240f94e',
 '43b505e8-09f0-4df7-a5af-5774a89ad425',
 'a4f58576-8609-40ee-b205-391a09342727',
 'ef18f4a8-5b71-4e3a-b499-acac84eab37d',
 'e693709d-95f8-4643-b2e1-0649d6687b06',
 '1e52e59b-0979-41c1-abb6-134639351796',
 '2ed756f1-6064-

In [42]:
from langchain.tools import tool 

@tool(response_format='content_and_artifact') 
def retrieve_context(query: str): 
    """Retrieve information to help answer a query""" 
    retrieved_docs = vector_store.similarity_search(query,k=2) 
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent:{doc.page_content}") 
        for doc in retrieved_docs 
    )
    return serialized,retrieved_docs

In [43]:
from langchain.agents import create_agent 

tools = [retrieve_context]

prompt= ( 
    'you have access to a tool that retrieves context from a blog post.' 
    'use the tool to help answer user queries.' 
) 
agent = create_agent(model,tools,system_prompt=prompt)

In [46]:
query = ( 
    'what is the standard method for task decomposition?\n\n' 
    'once you get the answer, look up common extensions of that method.'
)

for event in agent.stream( 
    {'messages':[{'role':'user','content':query}]}, 
    stream_mode='values',
): 
    message = event['messages'][-1]
    
    # Check if it is the AI's final answer (not a tool call)
    if message.type == 'ai' and not message.tool_calls:
        print(message.content[0]['text'])

The standard method for task decomposition often involves using a Large Language Model (LLM) with simple prompting, such as asking "Steps for XYZ.\n1." or "What are the subgoals for achieving XYZ?".

Common extensions or alternative methods for task decomposition, as mentioned in the provided context, include:
*   Using task-specific instructions (e.g., providing the prompt "Write a story outline." for the task of writing a novel).
*   Incorporating human inputs to guide the decomposition process.


In [47]:
from langchain.agents.middleware import dynamic_prompt,ModelRequest 

@dynamic_prompt 
def prompt_with_context(request: ModelRequest) -> str: 
    """ Inject context into state messages.""" 
    last_query = request.state['messages'][-1].text 
    retrieved_docs = vector_store.similarity_search(last_query) 

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs) 

    system_message = ( 
    'you are a helpful assistant. use the following context in your response.' 
    f'\n\n{docs_content}' 
    ) 
    return system_message 

    agent = create_agent(model,tools=[],middleware=[prompt_with_context])

In [48]:
query = 'what is task decomposition?' 

for step in agent.stream( 
    {'messages':[{'role':'user','content':query}]} ,
    stream_mode = 'values', 
): 
    step['messages'][-1].pretty_print()


what is task decomposition?
Tool Calls:
  retrieve_context (38519822-ca6e-4401-911e-fe1f7aef8315)
 Call ID: 38519822-ca6e-4401-911e-fe1f7aef8315
  Args:
    query: what is task decomposition?
Name: retrieve_context

Source: {'start_index': 2578, 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}
Content:Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.

Source: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 2578}
Content:Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.

[{'type': 'text', 'text': 'Task decomposition is the process of bre