#### Agentic RAG

In [1]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [2]:
from langchain_core.tools import tool
from langchain_community.utilities import GoogleSerperAPIWrapper

In [None]:
@tool("GoogleSearch")
def search(query_string: str):
    """
    Useful to search for any kinds of information and
    when you need to search the internet for any kinds of information, use this tool.
    Prefer this tool when you search for long queries.
    Should not be used for Article search.
    """
    
    search = GoogleSerperAPIWrapper()
    
    return search.run(query_string)

In [4]:

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
wiki = WikipediaQueryRun(
    name="WikiepdiaSearch",
    description="Use this tool when you want to search for information on Wikipedia by Terms, Keywords or any Topics.",
    api_wrapper=api_wrapper)

In [5]:
wiki.name

'WikiepdiaSearch'

In [6]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [7]:
loader = WebBaseLoader("https://docs.smith.langchain.com")
docs = loader.load()
documents = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
).split_documents(docs)

vectordatabase = FAISS.from_documents(documents, OpenAIEmbeddings())
retriever = vectordatabase.as_retriever()

In [8]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000016390718820>, search_kwargs={})

In [9]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    "search for information about langsmith. for any questions related to langsmith, you must use this tool"
)

retriever_tool.name

'langsmith_search'

In [10]:
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

In [11]:
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)

In [12]:
arxiv.name

'arxiv'

In [13]:
tools = [arxiv, search, wiki, retriever_tool]
tools

[ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=1000)),
 StructuredTool(name='GoogleSearch', description='Useful to search for any kinds of information and\nwhen you need to search the internet for any kinds of information, use this tool.\nPrefer this tool when you search for long queries.', args_schema=<class 'langchain_core.utils.pydantic.GoogleSearch'>, func=<function search at 0x00000163F5A70430>),
 WikipediaQueryRun(name='WikiepdiaSearch', description='Use this tool when you want to search for information on Wikipedia by Terms, Keywords or any Topics.', api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\jdram\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages

In [14]:
import os

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

In [15]:
load_dotenv()

llm = ChatOpenAI(
    model="gpt-3.5-turbo-0125",
    max_tokens=2000,
    temperature=0.1,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

In [16]:
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages



[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [17]:
# from langchain.agents import create_react_agent, AgentExecutor

# agent = create_react_agent(llm = llm, tools = tools, prompt=prompt)
# agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage, SystemMessage

agent_executor = create_react_agent(
    llm,
    tools = tools
)

In [18]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="tell me about langsmith")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_hOxBxEf9ywgHAiC1BFlX5DlL', 'function': {'arguments': '{"query":"langsmith"}', 'name': 'langsmith_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 252, 'total_tokens': 269, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BChT9s0j8WIBwLT5n4uQfKdbtDRdF', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-624922c7-5bd9-4afb-ae88-45f204e9cf98-0', tool_calls=[{'name': 'langsmith_search', 'args': {'query': 'langsmith'}, 'id': 'call_hOxBxEf9ywgHAiC1BFlX5DlL', 'type': 'tool_call'}], usage_metadata={'input_tokens': 252, 'output_tokens': 17, 'total_tokens': 269, 'input_token_details': {'a

In [19]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="whats the paper 2412.16446 talk about it?")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_zpoG07UmFsc6HqzhHJKJlEz9', 'function': {'arguments': '{"query":"2412.16446"}', 'name': 'arxiv'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 261, 'total_tokens': 280, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BChTCSjQPOyPvFOzPZOVG1aLnAU3r', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-896dad32-95ec-4fc7-aca0-cbdab10ac072-0', tool_calls=[{'name': 'arxiv', 'args': {'query': '2412.16446'}, 'id': 'call_zpoG07UmFsc6HqzhHJKJlEz9', 'type': 'tool_call'}], usage_metadata={'input_tokens': 261, 'output_tokens': 19, 'total_tokens': 280, 'input_token_details': {'audio': 0, 'cache_rea

In [20]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="Machine Learning")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_urhGwJujEjyvud3nlAKSLdbz', 'function': {'arguments': '{"query": "Machine Learning"}', 'name': 'arxiv'}, 'type': 'function'}, {'id': 'call_m6UIK7qWWMkesNmwZhOTJ0Vk', 'function': {'arguments': '{"query_string": "Machine Learning"}', 'name': 'GoogleSearch'}, 'type': 'function'}, {'id': 'call_nXzqff055BR439R45zX83Idp', 'function': {'arguments': '{"query": "Machine Learning"}', 'name': 'WikiepdiaSearch'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 66, 'prompt_tokens': 249, 'total_tokens': 315, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BChTG0fXOefZR7JwWFfoh3D9IF6td', 'finish_reason': 'tool_calls', 'logprobs':

In [21]:
for stream in agent_executor.stream({
    "messages": [
        HumanMessage(content="Who won the world cup in the Year 2020 in Cricket?")
    ]
}):
    print(stream)
    print("***********")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_2NkEZDIzgSXpMahqA3U2RkjA', 'function': {'arguments': '{"query":"2020 ICC T20 World Cup winner"}', 'name': 'WikiepdiaSearch'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 261, 'total_tokens': 287, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BChTMwFblTwoT9StSh7ILkLG4YYSu', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a9668d8d-fd6b-499d-aaaa-8020bee730ed-0', tool_calls=[{'name': 'WikiepdiaSearch', 'args': {'query': '2020 ICC T20 World Cup winner'}, 'id': 'call_2NkEZDIzgSXpMahqA3U2RkjA', 'type': 'tool_call'}], usage_metadata={'input_tokens': 261, 'output_tokens': 26, 'total_to