## Building Advanced RAG Q&A Project With Multiple Data Sources With Langchain

In [1]:
from langchain_community.tools import WikipediaQueryRun 
from langchain_community.utilities import WikipediaAPIWrapper

## API wrapper
- this is basically use to query answer from wikipedia

In [3]:
api_wrapper=WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=200)
wiki=WikipediaQueryRun(api_wrapper=api_wrapper)

In [4]:
wiki.name

'wikipedia'

In [7]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [25]:
from dotenv import load_dotenv
import os
load_dotenv()
os.environ["OPENAI_API_BASE"] = os.getenv("OPENAI_API_BASE")
os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')

True

In [9]:
loader=WebBaseLoader('https://docs.smith.langchain.com/')
docs=loader.load()
documents=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200).split_documents(docs)

In [10]:
# 临时增加代理
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

# 临时增加代理
os.environ['HTTP_PROXY'] = "http://127.0.0.1:7890"
os.environ['HTTPS_PROXY'] = "http://127.0.0.1:7890"

model_name = "BAAI/bge-large-zh-v1.5"
model_kwargs = {'device': 'cpu', "trust_remote_code":True}
encode_kwargs = {'normalize_embeddings': True}
hf_emb_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
# 去除增加代理
os.environ.pop('HTTP_PROXY')
os.environ.pop('HTTPS_PROXY')

## creating vector DB
vectordb=FAISS.from_documents(documents, hf_emb_model)
retriever=vectordb.as_retriever()
retriever

'http://127.0.0.1:7890'

'http://127.0.0.1:7890'

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7753fcee0710>, search_kwargs={})

In [15]:
from langchain_core.tools.retriever import create_retriever_tool
retriever_tool=create_retriever_tool(retriever,"langsmith_search","Search for information about Langsmith. For any questions about LangSmith, youmust use this tool")

In [16]:
retriever_tool.name

'langsmith_search'

## Arxiv Tool

In [17]:
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun


In [19]:
arxiv_wrapper=ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv=ArxivQueryRun(arxiv_wrapper=arxiv_wrapper)
arxiv.name

'arxiv'

## Combine All Tools

In [20]:
tools=[wiki,arxiv,retriever_tool]

In [21]:
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/home/chester/gitlab.chesterwang.com/ai_demo_4_interview/RAG/.venv/lib/python3.11/site-packages/wikipedia/__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=200)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=3, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=4000)),
 Tool(name='langsmith_search', description='Search for information about Langsmith. For any questions about LangSmith, youmust use this tool', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x7753a83a2480>, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddi

## Agents
- The core idea of agents is to use a language model to choose a sequence of actions to take. In chains, a sequence of actions is hardcoded (in code). In agents, a language model is used as a reasoning engine to determine which actions to take and in which order.
[Learn more](https://python.langchain.com/v0.1/docs/modules/agents/)

In [22]:
from langchain_openai import ChatOpenAI
# llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
# llm = ChatOpenAI(model="kimi-k2-turbo-preview", temperature=0) # 会遇到httpcore的问题
# from langchain_community.llms import Ollama,ChatOllama
from langchain_ollama import ChatOllama
# llm = Ollama(model="deepseek-r1:1.5b",
#     base_url="http://localhost:11434"  # 明确指定 Ollama 服务地址
# )
llm = ChatOllama(model="qwen3:0.6b",
    base_url="http://localhost:11434"  # 明确指定 Ollama 服务地址
)

In [36]:
from langchain_core.prompts import load_prompt
# prompt = load_prompt("hwchase17/openai-functions-agent")
# prompt.messages
from langsmith import Client
client = Client(api_key=os.getenv("LANGSMITH_API_KEY"))
prompt = client.pull_prompt("hwchase17/openai-functions-agent", include_model=False)
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [37]:
from langchain_classic.agents import create_openai_tools_agent
#这里的tools参数主要提供工具的描述信息(因为内部要对)
agent=create_openai_tools_agent(llm,tools,prompt)

## Agent Executor

In [38]:
from langchain_classic.agents import AgentExecutor
#这里的tools参数主要具体的执行函数
agent_executor=AgentExecutor(agent=agent,tools=tools,verbose=True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag

In [39]:
response=agent_executor.invoke({"input":"What's the paper 1605.08386 about?"})
# 这里使用deepseek-r1:1.5b 的结果是总是不会调用相关工具。



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `arxiv` with `{'query': '1605.08386'}`


[0m[33;1m[1;3mPublished: 2016-05-26
Title: Heat-bath random walks with Markov bases
Authors: Caprice Stanley, Tobias Windisch
Summary: Graphs on lattice points are studied whose edges come from a finite set of
allowed moves of arbitrary length. We show that the diameter of these graphs on
fibers of a fixed integer matrix can be bounded from above by a constant. We
then study the mixing behaviour of heat-bath random walks on these graphs. We
also state explicit conditions on the set of moves so that the heat-bath random
walk, a generalization of the Glauber dynamics, is an expander in fixed
dimension.[0m[32;1m[1;3mThe paper titled **"Heat-bath random walks with Markov bases"** was published on **2016-05-26**. It explores graphs whose edges correspond to moves on a finite set of allowed steps, specifically studied for their **diameter** and **mixing behavior**. The autho

In [84]:
print(response)

{'input': "What's the paper 1605.08386 about?", 'output': 'The paper with the identifier 1605.08386 is from the ACM Transactions on Algorithms. It is a research paper published by the Association for Computing Machinery (ACM). If you have more details or context about the paper, I can provide further information.'}


In [40]:
response=agent_executor.invoke({"input":"introduce the GPU to me?"})
# 这里使用deepseek-r1:1.5b 的结果是总是不会调用相关工具。



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mA GPU (Graphics Processing Unit) is a parallel processor used in computers to accelerate data processing tasks. It's responsible for rendering graphics, such as in games, video editing, or machine learning, by handling complex calculations that a CPU would take time to process. Unlike a CPU, which is slower, GPUs can handle multiple tasks simultaneously, making them ideal for applications that require significant computational power.[0m

[1m> Finished chain.[0m


In [41]:
print(response)

{'input': 'introduce the GPU to me?', 'output': "A GPU (Graphics Processing Unit) is a parallel processor used in computers to accelerate data processing tasks. It's responsible for rendering graphics, such as in games, video editing, or machine learning, by handling complex calculations that a CPU would take time to process. Unlike a CPU, which is slower, GPUs can handle multiple tasks simultaneously, making them ideal for applications that require significant computational power."}
