Skip to content

Commit

Permalink
增加了仅限GPT4的agent功能,陆续补充,中文版readme已写 (#1611)
Browse files Browse the repository at this point in the history
  • Loading branch information
zRzRzRzRzRzRzR committed Sep 27, 2023
1 parent c546b42 commit 5702554
Show file tree
Hide file tree
Showing 11 changed files with 277 additions and 87 deletions.
27 changes: 20 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,18 @@ docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/ch

关于如何使用自定义分词器和贡献自己的分词器,可以参考[Text Splitter 贡献说明](docs/splitter.md)

## Agent生态
### 基础的Agent
在本版本中,我们实现了一个简单的基于OpenAI的React的Agent模型,目前,经过我们测试,仅有以下两个模型支持:
+ OpenAI GPT4
+ ChatGLM2-130B

目前版本的Agent仍然需要对提示词进行大量调试,调试位置

### 构建自己的Agent工具

详见 (docs/自定义Agent.md)

## Docker 部署

🐳 Docker 镜像地址: `registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.3)`
Expand Down Expand Up @@ -392,23 +404,24 @@ CUDA_VISIBLE_DEVICES=0,1 python startup.py -a
- [ ] 结构化数据接入
- [X] .csv
- [ ] .xlsx
- [ ] 分词及召回
- [ ] 接入不同类型 TextSplitter
- [ ] 优化依据中文标点符号设计的 ChineseTextSplitter
- [ ] 重新实现上下文拼接召回
- [X] 分词及召回
- [X] 接入不同类型 TextSplitter
- [X] 优化依据中文标点符号设计的 ChineseTextSplitter
- [X] 重新实现上下文拼接召回
- [ ] 本地网页接入
- [ ] SQL 接入
- [ ] 知识图谱/图数据库接入
- [X] 搜索引擎接入
- [X] Bing 搜索
- [X] DuckDuckGo 搜索
- [ ] Agent 实现
- [X] Agent 实现
- [X]基础React形式的Agent实现,包括调用计算器等
- [X] LLM 模型接入
- [X] 支持通过调用 [FastChat](https://github.com/lm-sys/fastchat) api 调用 llm
- [ ] 支持 ChatGLM API 等 LLM API 的接入
- [X] 支持 ChatGLM API 等 LLM API 的接入
- [X] Embedding 模型接入
- [X] 支持调用 HuggingFace 中各开源 Emebdding 模型
- [ ] 支持 OpenAI Embedding API 等 Embedding API 的接入
- [X] 支持 OpenAI Embedding API 等 Embedding API 的接入
- [X] 基于 FastAPI 的 API 方式调用
- [X] Web UI
- [X] 基于 Streamlit 的 Web UI
Expand Down
16 changes: 11 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
langchain==0.0.287
langchain>=0.0.302
fschat[model_worker]==0.2.29
openai
sentence_transformers
transformers>=4.31.0
torch~=2.0.0
fastapi~=0.99.1
transformers>=4.33.0
torch>=2.0.1
torchvision
torchaudio
fastapi>=0.103.1
nltk~=3.8.1
uvicorn~=0.23.1
starlette~=0.27.0
Expand Down Expand Up @@ -40,9 +42,13 @@ pandas~=2.0.3
streamlit>=1.26.0
streamlit-option-menu>=0.3.6
streamlit-antd-components>=0.1.11
streamlit-chatbox >=1.1.6, <=1.1.7
streamlit-chatbox>=1.1.9
streamlit-aggrid>=0.3.4.post3
httpx~=0.24.1
watchdog
tqdm
websockets
tiktoken
einops
scipy
transformers_stream_generator==0.0.4
13 changes: 8 additions & 5 deletions requirements_api.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
langchain==0.0.287
langchain>=0.0.302
fschat[model_worker]==0.2.29
openai
sentence_transformers
transformers>=4.31.0
torch~=2.0.0
fastapi~=0.99.1
transformers>=4.33.0
torch >=2.0.1
torchvision
torchaudio
fastapi>=0.103.1
nltk~=3.8.1
uvicorn~=0.23.1
starlette~=0.27.0
Expand All @@ -17,13 +19,14 @@ accelerate
spacy
PyMuPDF==1.22.5
rapidocr_onnxruntime>=1.3.2

requests
pathlib
pytest
scikit-learn
numexpr
vllm==0.1.7; sys_platform == "linux"


# online api libs
# zhipuai
# dashscope>=1.10.0 # qwen
Expand Down
2 changes: 1 addition & 1 deletion requirements_webui.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pandas~=2.0.3
streamlit>=1.26.0
streamlit-option-menu>=0.3.6
streamlit-antd-components>=0.1.11
streamlit-chatbox >=1.1.6, <=1.1.7
streamlit-chatbox>=1.1.9
streamlit-aggrid>=0.3.4.post3
httpx~=0.24.1
nltk
Expand Down
109 changes: 109 additions & 0 deletions server/agent/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from uuid import UUID
from langchain.callbacks import AsyncIteratorCallbackHandler
import json
import asyncio
from typing import Any, Dict, List, Optional

from langchain.schema import AgentFinish, AgentAction
from langchain.schema.output import LLMResult


def dumps(obj: Dict) -> str:
return json.dumps(obj, ensure_ascii=False)


class Status:
start: int = 1
running: int = 2
complete: int = 3
agent_action: int = 4
agent_finish: int = 5
error: int = 6
make_tool: int = 7


class CustomAsyncIteratorCallbackHandler(AsyncIteratorCallbackHandler):
def __init__(self):
super().__init__()
self.queue = asyncio.Queue()
self.done = asyncio.Event()
self.cur_tool = {}
self.out = True

async def on_tool_start(self, serialized: Dict[str, Any], input_str: str, *, run_id: UUID,
parent_run_id: UUID | None = None, tags: List[str] | None = None,
metadata: Dict[str, Any] | None = None, **kwargs: Any) -> None:
self.cur_tool = {
"tool_name": serialized["name"],
"input_str": input_str,
"output_str": "",
"status": Status.agent_action,
"run_id": run_id.hex,
"llm_token": "",
"final_answer": "",
"error": "",
}
self.queue.put_nowait(dumps(self.cur_tool))

async def on_tool_end(self, output: str, *, run_id: UUID, parent_run_id: UUID | None = None,
tags: List[str] | None = None, **kwargs: Any) -> None:
self.out = True
self.cur_tool.update(
status=Status.agent_finish,
output_str=output.replace("Answer:", ""),
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_tool_error(self, error: Exception | KeyboardInterrupt, *, run_id: UUID,
parent_run_id: UUID | None = None, tags: List[str] | None = None, **kwargs: Any) -> None:
self.out = True
self.cur_tool.update(
status=Status.error,
error=str(error),
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
if token:
if token == "Action":
self.out = False
self.cur_tool.update(
status=Status.running,
llm_token="\n\n",
)

if self.out:
self.cur_tool.update(
status=Status.running,
llm_token=token,
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
self.cur_tool.update(
status=Status.start,
llm_token="",
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
self.out = True
self.cur_tool.update(
status=Status.complete,
llm_token="",
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_error(self, error: Exception | KeyboardInterrupt, **kwargs: Any) -> None:
self.cur_tool.update(
status=Status.error,
error=str(error),
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_agent_finish(
self, finish: AgentFinish, *, run_id: UUID, parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
self.cur_tool = {}
36 changes: 20 additions & 16 deletions server/agent/custom_template.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
template = """
尽可能地回答以下问题。你可以使用以下工具:{tools}
请按照以下格式进行:
Question: 需要你回答的输入问题
Thought: 你应该总是思考该做什么
Action: 需要使用的工具,应该是[{tool_names}]中的一个
Action Input: 传入工具的内容
请按照以下格式进行:
Question: 需要你回答的输入问题。
Thought: 你应该总是思考该做什么,并告诉我你要用什么工具。
Action: 需要使用的工具,应该是[{tool_names}]中的一个
Action Input: 传入工具的内容
Observation: 行动的结果
... (这个Thought/Action/Action Input/Observation可以重复N次)
Thought: 我现在知道最后的答案
Final Answer: 对原始输入问题的最终答案
现在开始!
Thought: 通过使用工具,我是否知道了答案,如果知道,就自然的回答问题,如果不知道,继续使用工具或者自己的知识 \n
Final Answer: 这个问题的答案是,输出完整的句子。
现在开始!
之前的对话:
之前的对话:
{history}
New question: {input}
Thought: {agent_scratchpad}"""
New question:
{input}
Thought:
{agent_scratchpad}"""


# ChatGPT 提示词模板
Expand Down Expand Up @@ -84,7 +85,7 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
return AgentFinish(
# Return values is generally always a dictionary with a single `output` key
# It is not recommended to try anything else at the moment :)
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
return_values={"output": llm_output.replace("Final Answer:", "").strip()},
log=llm_output,
)
# Parse out the action and action input
Expand All @@ -95,10 +96,13 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
return_values={"output": f"调用agent失败: `{llm_output}`"},
log=llm_output,
)
raise OutputParserException(f"调用agent失败: `{llm_output}`")
action = match.group(1).strip()
action_input = match.group(2)
# Return the action and action input
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
return AgentAction(
tool=action,
tool_input=action_input.strip(" ").strip('"'),
log=llm_output
)


Empty file added server/agent/knoledge.py
Empty file.
10 changes: 9 additions & 1 deletion server/agent/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

_PROMPT_TEMPLATE = '''
# 指令
接下来,作为一个专业的翻译专家,当我给出英文句子或段落时,你将提供通顺且具有可读性的对应语言的翻译。注意:
接下来,作为一个专业的翻译专家,当我给出句子或段落时,你将提供通顺且具有可读性的对应语言的翻译。注意:
1. 确保翻译结果流畅且易于理解
2. 无论提供的是陈述句或疑问句,只进行翻译
3. 不添加与原文无关的内容
Expand All @@ -21,6 +21,14 @@
${{翻译结果}}
```
答案: ${{答案}}
以下是一个例子
问题: 翻译13成英语
```text
13 English
```output
thirteen
答案: thirteen
'''

PROMPT = PromptTemplate(
Expand Down
2 changes: 1 addition & 1 deletion server/agent/weather.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def weather(query):
return "只能查看24小时内的天气,无法回答"
if time == "None":
time = "24" # 免费的版本只能24小时内的天气
key = "" # 和风天气API Key
key = "315625cdca234137944d7f8956106a3e" # 和风天气API Key
if key == "":
return "请先在代码中填入和风天气API Key"
city_info = get_city_info(location=location, adm=adm, key=key)
Expand Down

0 comments on commit 5702554

Please sign in to comment.