# Search engine with Tools & Agents

In [26]:
from langchain_community.tools import ArxivQueryRun, WikipediaQueryRun
from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper

In [27]:
api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=3, doc_content_chars_max=1000)
tool_wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
tool_wiki.name

'wikipedia'

In [28]:
tool_wiki.invoke("What is precalculus in math?")

'Page: New Math\nSummary: New Mathematics or New Math was a dramatic but temporary change in the way mathematics was taught in American grade schools, and to a lesser extent in European countries and elsewhere, during the 1950s–1970s.\n\n\n\nPage: Mathematics education in the United States\nSummary: Mathematics education in the United States varies considerably from one state to the next, and even within a single state. However, with the adoption of the Common Core Standards in most states and the District of Columbia beginning in 2010, mathematics content across the country has moved into closer agreement for each grade level. The SAT, a standardized university entrance exam, has been reformed to better reflect the contents of the Common Core. However, many students take alternatives to the traditional pathways, including accelerated tracks. As of 2023, twenty-seven states require students to pass three math courses before graduation from high school (grades 9 to 12, for students typi

In [29]:
api_wrapper_arxiv = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=250)
tool_arxiv = ArxivQueryRun(api_wrapper=api_wrapper_arxiv)
tool_arxiv.name

'arxiv'

In [30]:
tool_arxiv.invoke("What is transformer?")

'Published: 2022-05-12\nTitle: Predicting Human Psychometric Properties Using Computational Language Models\nAuthors: Antonio Laverghetta Jr., Animesh Nighojkar, Jamshidbek Mirzakhalov, John Licato\nSummary: Transformer-based language models (LMs) contin'

In [31]:
tools = [tool_wiki, tool_arxiv]

In [32]:
## Custom tools[RAG Tool]
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [33]:
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()

documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)

vectordb = FAISS.from_documents(documents,OpenAIEmbeddings())

retriever = vectordb.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x125d3fc10>, search_kwargs={})

In [34]:
retriever.invoke("how to upload a dataset")[0]

Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!', 'language': 'en'}, page_content='description="A sample dataset in LangSmith.")client.create_examples(    inputs=[        {"postfix": "to LangSmith"},        {"postfix": "to Evaluations in LangSmith"},    ],    outputs=[        {"output": "Welcome to LangSmith"},        {"output": "Welcome to Evaluations in LangSmith"},    ],    dataset_id=dataset.id,)# Define your evaluatordef exact_match(run, example):    return {"score": run.outputs["output"] == example.outputs["output"]}experiment_results = evaluate(    lambda input: "Welcome " + input[\'postfix\'], # Your AI system goes here    data=dataset_name, # The d

In [35]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(retriever, "langsmith-search", "Search any information about Langsmith")
retriever_tool.name

'langsmith-search'

In [36]:
tools = [tool_wiki, tool_arxiv, retriever_tool]
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/Users/kamaraj/scm/ai/genai-langchain/.venv/lib/python3.11/site-packages/wikipedia/__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=1000)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=250)),
 Tool(name='langsmith-search', description='Search any information about Langsmith', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x1170acb80>, retriever=VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x125d3fc10>, 

In [37]:
## Run all this tools with Agents and LLM Models
## Tools, LLM-->AgentExecutor

from langchain_groq import ChatGroq
from dotenv import load_dotenv
import openai
import os

load_dotenv()

llm = ChatGroq(model_name="gemma2-9b-it")
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x1375d46d0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x1375e0610>, model_name='gemma2-9b-it', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [38]:
## Prompt Template
from langchain import hub

prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [39]:
## Agents
from langchain.agents import create_openai_tools_agent

agent = create_openai_tools_agent(llm, tools, prompt)
agent

RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages

In [40]:
## Agent Executer
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag

In [41]:
agent_executor.invoke({"input": "Tell me about Langsmith"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `langsmith-search` with `{'query': 'Langsmith'}`


[0m[38;5;200m[1;3mGet started with LangSmith | 🦜️🛠️ LangSmith

Skip to main contentGo to API DocsSearchRegionUSEUGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingLangGraph CloudQuick startOn this pageGet started with LangSmithLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!1. Install LangSmith​PythonTypeScriptpip install -U langsmithyarn add langsmith2. Create an API key​To create an API key head to the Settings page. Then click Create API Key.3. Set up your environment​Shellexport LANGCHAIN_TRACING_V2=trueexport LANGCHAIN_API_KEY=<your-api-key># The below examples use the OpenAI API, though it's not necessary in generalexport OPENAI_API_KEY

{'input': 'Tell me about Langsmith',
 'output': 'LangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!'}

In [42]:
agent_executor.invoke({"input": "What is machine learning"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'machine learning'}`


[0m[36;1m[1;3mPage: Machine learning
Summary: Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions. Quick progress in the field of deep learning, beginning in 2010s, allowed neural networks to surpass many previous approaches in performance.
ML finds application in many fields, including natural language processing, computer vision, speech recognition, email filtering, agriculture, and medicine. The application of ML to business problems is known as predictive analytics.
Statistics and mathematical optimization (mathematical programming) methods comprise the foundations of machine learning. Data mining is a related field of study, focusing on exploratory data analysis (EDA)

{'input': 'What is machine learning',
 'output': 'Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data, and thus perform tasks without explicit instructions.  \n'}

In [43]:
agent_executor.invoke({"input": "What's the paper 1706.03762 about?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `arxiv` with `{'query': '1706.037762'}`


[0m[33;1m[1;3mNo good Arxiv Result was found[0m[32;1m[1;3mThat's strange.  Could you double check the ID?  It looks like there might be a typo. 
[0m

[1m> Finished chain.[0m


{'input': "What's the paper 1706.03762 about?",
 'output': "That's strange.  Could you double check the ID?  It looks like there might be a typo. \n"}

## Adding in memory
This agent is stateless. This means it does not remember previous interactions. To give it memory we need to pass in previous chat_history. Note: it needs to be called chat_history because of the prompt we are using. If we use a different prompt, we could change the variable name.

In [44]:
# Here we pass in an empty list of messages for chat_history because it is the first message in the chat
agent_executor.invoke({"input": "hi! my name is bob", "chat_history": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Bob'}`


[0m



  lis = BeautifulSoup(html).find_all('li')


[36;1m[1;3mPage: B.o.B
Summary: Bobby Ray Simmons Jr. (born November 15, 1988), known professionally as B.o.B, is an American rapper, singer, and record producer. Raised in Decatur, Georgia, Simmons signed with producer Jim Jonsin through his Rebel Rock Entertainment label in 2006. Two years later, he signed a joint venture recording contract with fellow Georgia rapper T.I.'s Grand Hustle Records, an imprint of Atlantic Records. Following his major-label deal, Simmons quickly achieved commercial success when his 2009 debut single, "Nothin' on You" (featuring Bruno Mars), peaked the U.S. Billboard Hot 100 chart and received three nominations—Record of the Year, Best Rap Song, and Best Rap/Sung Collaboration—at the 53rd Grammy Awards.
His 2010 follow-up singles, "Airplanes" (featuring Hayley Williams of Paramore) and "Magic" (featuring Rivers Cuomo), peaked at numbers two and ten on the Billboard Hot 100, respectively. All three preceded the release of Simmons' debut studio album The A

{'input': 'hi! my name is bob',
 'chat_history': [],
 'output': 'It looks like B.o.B\'s debut album was called "B.o.B Presents: The Adventures of Bobby Ray" and it was released in 2010.  '}

In [45]:
from langchain_core.messages import AIMessage, HumanMessage

agent_executor.invoke(
    {
        "chat_history": [
            HumanMessage(content="hi! my name is bob"),
            AIMessage(content="Hello Bob! How can I assist you today?"),
        ],
        "input": "what's my name?",
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Bob.[0m

[1m> Finished chain.[0m


{'chat_history': [HumanMessage(content='hi! my name is bob', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, response_metadata={})],
 'input': "what's my name?",
 'output': 'Your name is Bob.'}

In [46]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

message_history = ChatMessageHistory()

agent_with_chat_history = RunnableWithMessageHistory(
    agent_executor,
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    lambda session_id: message_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

agent_with_chat_history.invoke(
    {"input": "hi! I'm bob"},
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    config={"configurable": {"session_id": "<foo>"}},
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `langsmith-search` with `{'query': 'What is Langsmith?'}`


[0m[38;5;200m[1;3mGet started with LangSmith | 🦜️🛠️ LangSmith

Skip to main contentGo to API DocsSearchRegionUSEUGo to AppQuick startTutorialsHow-to guidesConceptsReferencePricingSelf-hostingLangGraph CloudQuick startOn this pageGet started with LangSmithLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!1. Install LangSmith​PythonTypeScriptpip install -U langsmithyarn add langsmith2. Create an API key​To create an API key head to the Settings page. Then click Create API Key.3. Set up your environment​Shellexport LANGCHAIN_TRACING_V2=trueexport LANGCHAIN_API_KEY=<your-api-key># The below examples use the OpenAI API, though it's not necessary in generalexport OPENA

{'input': "hi! I'm bob",
 'chat_history': [],
 'output': 'Langsmith is a platform for building production-grade LLM applications. \n'}

In [47]:
agent_with_chat_history.invoke(
    {"input": "what's my name?"},
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    config={"configurable": {"session_id": "<foo>"}},
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is bob.[0m

[1m> Finished chain.[0m


{'input': "what's my name?",
 'chat_history': [HumanMessage(content="hi! I'm bob", additional_kwargs={}, response_metadata={}),
  AIMessage(content='Langsmith is a platform for building production-grade LLM applications. \n', additional_kwargs={}, response_metadata={})],
 'output': 'Your name is bob.'}