In [None]:
import os
os.environ["OPENAI_API_KEY"] = "你的openapi key"
os.environ["OPENAI_BASE_URL"] = "你的地址"

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = "你的langsmith key"

In [None]:
pip install bs4

In [None]:
# 1.加载文档 使用 `WebBaseLoader` 类从指定来源加载内容，并生成 `Document` 对象（依赖 `bs4` 库）。
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

print(f"Total characters: {len(docs[0].page_content)}")

USER_AGENT environment variable not set, consider setting it to identify your requests.


Total characters: 43130


In [5]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [6]:
# 分割
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 66 sub-documents.


In [None]:
# 3.进行向量化和存储
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS


def create_vector_store(chunks,save_path=''):
    # 初始化 OpenAI 嵌入模型
    embedding = OpenAIEmbeddings(model="text-embedding-ada-002",chunk_size=1000)
    # 创建FAISS向量库
    vector_store = FAISS.from_documents(documents=chunks,embedding=embedding)
    
    # 保存到本地（可选）
    if save_path:
        vector_store.save_local(save_path)
        print(f"Vector store saved to {save_path}")
    return vector_store

vector_store = create_vector_store(all_splits,'./docs')


Vector store saved to ./docs


In [None]:
print(vector_store)
print(f"已加载 {len(vector_store)} 个文档块")

In [7]:
# 如果已经有embedding文件，直接加载，不要重新处理了，节省token
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
embedding = OpenAIEmbeddings(model="text-embedding-ada-002",chunk_size=1000)
vector_store = FAISS.load_local(
    folder_path="./docs",       # 存放index.faiss和index.pkl的目录路径
    embeddings=embedding,      # 必须与创建时相同的嵌入模型
    index_name="index",          # 可选：若文件名不是默认的"index"，需指定前缀
    allow_dangerous_deserialization=True  # 显式声明信任
)
print(f"已加载 {len(vector_store.docstore._dict)} 个文档块")
print(vector_store)

已加载 66 个文档块
<langchain_community.vectorstores.faiss.FAISS object at 0x135953990>


In [None]:
# 使用rag的提示词模版 - 尝试可用性，真实代码不展示这段
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:


In [9]:
# 4.准备和Retrieval链
from langchain_openai import ChatOpenAI # ChatOpenAI模型

# 实例化一个大模型工具 - OpenAI的GPT-3.5
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 2})


In [28]:
from langchain.memory import ConversationBufferMemory

# 初始化记忆存储（保存对话历史）
memory = ConversationBufferMemory(
    memory_key="chat_history",  # 存储对话历史的键名
    return_messages=True        # 以消息列表格式存储（适合ChatModel）
)
def save_memory(question,content):
    memory.chat_memory.add_user_message(question)
    memory.chat_memory.add_ai_message(content)  # 最后一块内容

In [40]:
# 5. 问答展示
from langchain import hub

prompt_template = hub.pull("rlm/rag-prompt")

def ask_question(question,chat_history,usetream=False):
    memory.clear()
    # 1. 将历史对话加载到memory（如果是首次调用则跳过）
    if chat_history:
        for turn in chat_history:
            print('chat_history',chat_history)
            memory.chat_memory.add_user_message(turn["user"])  # 用户问题
            memory.chat_memory.add_ai_message(turn["ai"])      # AI回答
    # 2. 检索文档
    retrieved_docs = retriever.invoke(question)
    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
    # 3. 构建包含上下文的prompt
    prompt = prompt_template.invoke({"question": question, "context": docs_content, "chat_history": memory.buffer})
    # print(f'prompt:{prompt}')
    # 4. 流式/普通模式处理
    if usetream:
        # 返回生成器，逐块 yield 内容
        def stream_generator():
            for chunk in llm.stream(prompt):
                yield chunk.content
            # 将本轮对话存入memory
            save_memory(question, content=chunk)
        return stream_generator()
    else:
        answer = llm.invoke(prompt)
        # 保存到记忆
        save_memory(question, content=answer.content)
        return answer.content   


In [41]:
# 第一轮对话
history = []  # 初始化空历史
q1 = "What is Chain of thought ?"
answer1=ask_question(q1,chat_history=history)
print(f'Answer: {answer1}\n\n')
history.append({"user": q1, "ai": answer1})  # 记录到历史

Answer: Chain of Thought (CoT) is a prompting technique that enhances model performance on complex tasks by instructing the model to "think step by step." This approach decomposes difficult tasks into smaller, more manageable steps, allowing for better reasoning and interpretation of the model's thought process. CoT has become a standard method for improving the handling of intricate problems in language models.




In [42]:
# 第二轮对话（携带上文）
q2 = "What is the difference between the above content and Tree of Thoughts?"
answer2 = ask_question(q2, chat_history=history, usetream=True)  # 流式输出
for chunk in answer2:
    print(chunk, end="", flush=True)
history.append({"user": q2, "ai": "".join(chunk)})  # 记录流式结果的拼接

chat_history [{'user': 'What is Chain of thought ?', 'ai': 'Chain of Thought (CoT) is a prompting technique that enhances model performance on complex tasks by instructing the model to "think step by step." This approach decomposes difficult tasks into smaller, more manageable steps, allowing for better reasoning and interpretation of the model\'s thought process. CoT has become a standard method for improving the handling of intricate problems in language models.'}]
The content provided focuses on writing code based on detailed instructions and reasoning through decisions step by step. In contrast, Tree of Thoughts extends this approach by exploring multiple reasoning possibilities at each step, creating a tree structure for problem decomposition and thought generation. The search process in Tree of Thoughts can be BFS or DFS with evaluation by a classifier or majority vote.