### Agentic RAG

In [1]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_core.tools import tool
from langchain_community.utilities import GoogleSerperAPIWrapper

In [6]:
import os

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

In [7]:
load_dotenv()

openai_api_key = os.environ["OPENAI_API_KEY"]

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY environment variable not set")


In [2]:
@tool("GoogleSearch")
def search(query_string: str):
    """
    Useful to search for any kinds of information and
    when you need to search the internet for any kinds of information, use this tool.
    Prefer this tool when you search for long queries.
    Should not be used for Article search or Topic Search.
    You should use this only when you need to get real-time information about a topic.
    """
    
    search = GoogleSerperAPIWrapper()
    
    return search.run(query_string)

In [3]:

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
wiki = WikipediaQueryRun(
    name="WikiepdiaSearch",
    description="Use this tool when you want to analyze for information on Wikipedia by Terms, Keywords or any Topics.",
    api_wrapper=api_wrapper)

In [4]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [8]:
loader = WebBaseLoader("https://docs.smith.langchain.com")
docs = loader.load()
documents = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
).split_documents(docs)

vectordatabase = FAISS.from_documents(documents, OpenAIEmbeddings())
retriever = vectordatabase.as_retriever()

In [9]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    "search for information about langsmith. for any questions related to langsmith, you must use this tool"
)

retriever_tool.name

'langsmith_search'

In [10]:
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

In [11]:
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)

In [12]:
tools = [arxiv, search, wiki, retriever_tool]
tools

[ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=1000)),
 StructuredTool(name='GoogleSearch', description='Useful to search for any kinds of information and\nwhen you need to search the internet for any kinds of information, use this tool.\nPrefer this tool when you search for long queries.\nShould not be used for Article search or Topic Search.\nYou should use this only when you need to get real-time information about a topic.', args_schema=<class 'langchain_core.utils.pydantic.GoogleSearch'>, func=<function search at 0x0000024C14A9FCE0>),
 WikipediaQueryRun(name='WikiepdiaSearch', description='Use this tool when you want to analyze for information on Wikipedia by Terms, Keywords or any Topics.', ap

In [13]:
llm = ChatOpenAI(
    model="gpt-4o",
    max_tokens=2000,
    temperature=0.1,
    openai_api_key=openai_api_key
)

In [14]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [15]:
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage, SystemMessage

agent_executor = create_react_agent(
    llm,
    tools = tools
)

In [16]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="tell me about langsmith")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_DLbXAd2seU8RySZK7Mr7suBi', 'function': {'arguments': '{"query":"langsmith"}', 'name': 'langsmith_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 272, 'total_tokens': 288, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_cbf1785567', 'id': 'chatcmpl-CRbWcnGNnFTF6HOLb3sQZcMH157P8', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--870b57d0-617b-44c4-a367-c1ecd925a54a-0', tool_calls=[{'name': 'langsmith_search', 'args': {'query': 'langsmith'}, 'id': 'call_DLbXAd2seU8RySZK7Mr7suBi', 'type': 'tool_call'}], usage_metadata={'input_tokens': 272, 'output_tokens': 16, 'total_to

In [17]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="whats the paper 2412.16446 talk about it?")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_WWJwieOFDwoooPYRIL54FEuJ', 'function': {'arguments': '{"query":"2412.16446"}', 'name': 'arxiv'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 281, 'total_tokens': 299, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_cbf1785567', 'id': 'chatcmpl-CRbX7Q5gDVKZfkIPn6WCH2tDMhTuE', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--dae1117d-1c31-44f1-a339-0102ddf5ce93-0', tool_calls=[{'name': 'arxiv', 'args': {'query': '2412.16446'}, 'id': 'call_WWJwieOFDwoooPYRIL54FEuJ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 281, 'output_tokens': 18, 'total_tokens': 299, 'input_t

In [18]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="Indian Constitution")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_FmdmD77XzNohjhBubhgQd516', 'function': {'arguments': '{"query":"Indian Constitution"}', 'name': 'WikiepdiaSearch'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 269, 'total_tokens': 288, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_cbf1785567', 'id': 'chatcmpl-CRbYclUI1vKNI6RaJzRYrw3Cd5ypU', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--1e365fdd-0da5-409c-aced-ef25b9d91779-0', tool_calls=[{'name': 'WikiepdiaSearch', 'args': {'query': 'Indian Constitution'}, 'id': 'call_FmdmD77XzNohjhBubhgQd516', 'type': 'tool_call'}], usage_metadata={'input_tokens': 269, 'output_toke

In [19]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="Who won the cricket world cup in the Year 2025 in WTC?")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_BOwnqJKwvKpOsNP0mkGccGuW', 'function': {'arguments': '{"query_string":"2025 Cricket World Cup winner WTC"}', 'name': 'GoogleSearch'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 283, 'total_tokens': 305, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_cbf1785567', 'id': 'chatcmpl-CRbZ2GOacHR1teLSNYGAzgWSlsqwm', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--a72b59ac-dba9-4385-bfdb-65f9cc012475-0', tool_calls=[{'name': 'GoogleSearch', 'args': {'query_string': '2025 Cricket World Cup winner WTC'}, 'id': 'call_BOwnqJKwvKpOsNP0mkGccGuW', 'type': 'tool_call'}], usage_metadat