Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

增加了仅限GPT4的agent功能,陆续补充,中文版readme已写 #1611

Merged
merged 1 commit into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 20 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,18 @@ docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/ch

关于如何使用自定义分词器和贡献自己的分词器,可以参考[Text Splitter 贡献说明](docs/splitter.md)。

## Agent生态
### 基础的Agent
在本版本中,我们实现了一个简单的基于OpenAI的React的Agent模型,目前,经过我们测试,仅有以下两个模型支持:
+ OpenAI GPT4
+ ChatGLM2-130B

目前版本的Agent仍然需要对提示词进行大量调试,调试位置

### 构建自己的Agent工具

详见 (docs/自定义Agent.md)

## Docker 部署

🐳 Docker 镜像地址: `registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.3)`
Expand Down Expand Up @@ -392,23 +404,24 @@ CUDA_VISIBLE_DEVICES=0,1 python startup.py -a
- [ ] 结构化数据接入
- [X] .csv
- [ ] .xlsx
- [ ] 分词及召回
- [ ] 接入不同类型 TextSplitter
- [ ] 优化依据中文标点符号设计的 ChineseTextSplitter
- [ ] 重新实现上下文拼接召回
- [X] 分词及召回
- [X] 接入不同类型 TextSplitter
- [X] 优化依据中文标点符号设计的 ChineseTextSplitter
- [X] 重新实现上下文拼接召回
- [ ] 本地网页接入
- [ ] SQL 接入
- [ ] 知识图谱/图数据库接入
- [X] 搜索引擎接入
- [X] Bing 搜索
- [X] DuckDuckGo 搜索
- [ ] Agent 实现
- [X] Agent 实现
- [X]基础React形式的Agent实现,包括调用计算器等
- [X] LLM 模型接入
- [X] 支持通过调用 [FastChat](https://github.com/lm-sys/fastchat) api 调用 llm
- [ ] 支持 ChatGLM API 等 LLM API 的接入
- [X] 支持 ChatGLM API 等 LLM API 的接入
- [X] Embedding 模型接入
- [X] 支持调用 HuggingFace 中各开源 Emebdding 模型
- [ ] 支持 OpenAI Embedding API 等 Embedding API 的接入
- [X] 支持 OpenAI Embedding API 等 Embedding API 的接入
- [X] 基于 FastAPI 的 API 方式调用
- [X] Web UI
- [X] 基于 Streamlit 的 Web UI
Expand Down
16 changes: 11 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
langchain==0.0.287
langchain>=0.0.302
fschat[model_worker]==0.2.29
openai
sentence_transformers
transformers>=4.31.0
torch~=2.0.0
fastapi~=0.99.1
transformers>=4.33.0
torch>=2.0.1
torchvision
torchaudio
fastapi>=0.103.1
nltk~=3.8.1
uvicorn~=0.23.1
starlette~=0.27.0
Expand Down Expand Up @@ -40,9 +42,13 @@ pandas~=2.0.3
streamlit>=1.26.0
streamlit-option-menu>=0.3.6
streamlit-antd-components>=0.1.11
streamlit-chatbox >=1.1.6, <=1.1.7
streamlit-chatbox>=1.1.9
streamlit-aggrid>=0.3.4.post3
httpx~=0.24.1
watchdog
tqdm
websockets
tiktoken
einops
scipy
transformers_stream_generator==0.0.4
13 changes: 8 additions & 5 deletions requirements_api.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
langchain==0.0.287
langchain>=0.0.302
fschat[model_worker]==0.2.29
openai
sentence_transformers
transformers>=4.31.0
torch~=2.0.0
fastapi~=0.99.1
transformers>=4.33.0
torch >=2.0.1
torchvision
torchaudio
fastapi>=0.103.1
nltk~=3.8.1
uvicorn~=0.23.1
starlette~=0.27.0
Expand All @@ -17,13 +19,14 @@ accelerate
spacy
PyMuPDF==1.22.5
rapidocr_onnxruntime>=1.3.2

requests
pathlib
pytest
scikit-learn
numexpr
vllm==0.1.7; sys_platform == "linux"


# online api libs
# zhipuai
# dashscope>=1.10.0 # qwen
Expand Down
2 changes: 1 addition & 1 deletion requirements_webui.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pandas~=2.0.3
streamlit>=1.26.0
streamlit-option-menu>=0.3.6
streamlit-antd-components>=0.1.11
streamlit-chatbox >=1.1.6, <=1.1.7
streamlit-chatbox>=1.1.9
streamlit-aggrid>=0.3.4.post3
httpx~=0.24.1
nltk
Expand Down
109 changes: 109 additions & 0 deletions server/agent/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from uuid import UUID
from langchain.callbacks import AsyncIteratorCallbackHandler
import json
import asyncio
from typing import Any, Dict, List, Optional

from langchain.schema import AgentFinish, AgentAction
from langchain.schema.output import LLMResult


def dumps(obj: Dict) -> str:
return json.dumps(obj, ensure_ascii=False)


class Status:
start: int = 1
running: int = 2
complete: int = 3
agent_action: int = 4
agent_finish: int = 5
error: int = 6
make_tool: int = 7


class CustomAsyncIteratorCallbackHandler(AsyncIteratorCallbackHandler):
def __init__(self):
super().__init__()
self.queue = asyncio.Queue()
self.done = asyncio.Event()
self.cur_tool = {}
self.out = True

async def on_tool_start(self, serialized: Dict[str, Any], input_str: str, *, run_id: UUID,
parent_run_id: UUID | None = None, tags: List[str] | None = None,
metadata: Dict[str, Any] | None = None, **kwargs: Any) -> None:
self.cur_tool = {
"tool_name": serialized["name"],
"input_str": input_str,
"output_str": "",
"status": Status.agent_action,
"run_id": run_id.hex,
"llm_token": "",
"final_answer": "",
"error": "",
}
self.queue.put_nowait(dumps(self.cur_tool))

async def on_tool_end(self, output: str, *, run_id: UUID, parent_run_id: UUID | None = None,
tags: List[str] | None = None, **kwargs: Any) -> None:
self.out = True
self.cur_tool.update(
status=Status.agent_finish,
output_str=output.replace("Answer:", ""),
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_tool_error(self, error: Exception | KeyboardInterrupt, *, run_id: UUID,
parent_run_id: UUID | None = None, tags: List[str] | None = None, **kwargs: Any) -> None:
self.out = True
self.cur_tool.update(
status=Status.error,
error=str(error),
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
if token:
if token == "Action":
self.out = False
self.cur_tool.update(
status=Status.running,
llm_token="\n\n",
)

if self.out:
self.cur_tool.update(
status=Status.running,
llm_token=token,
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
self.cur_tool.update(
status=Status.start,
llm_token="",
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
self.out = True
self.cur_tool.update(
status=Status.complete,
llm_token="",
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_llm_error(self, error: Exception | KeyboardInterrupt, **kwargs: Any) -> None:
self.cur_tool.update(
status=Status.error,
error=str(error),
)
self.queue.put_nowait(dumps(self.cur_tool))

async def on_agent_finish(
self, finish: AgentFinish, *, run_id: UUID, parent_run_id: Optional[UUID] = None,
tags: Optional[List[str]] = None,
**kwargs: Any,
) -> None:
self.cur_tool = {}
36 changes: 20 additions & 16 deletions server/agent/custom_template.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
template = """
尽可能地回答以下问题。你可以使用以下工具:{tools}
请按照以下格式进行:
Question: 需要你回答的输入问题
Thought: 你应该总是思考该做什么
Action: 需要使用的工具,应该是[{tool_names}]中的一个
Action Input: 传入工具的内容
请按照以下格式进行:

Question: 需要你回答的输入问题。
Thought: 你应该总是思考该做什么,并告诉我你要用什么工具。
Action: 需要使用的工具,应该是[{tool_names}]中的一个
Action Input: 传入工具的内容
Observation: 行动的结果
... (这个Thought/Action/Action Input/Observation可以重复N次)
Thought: 我现在知道最后的答案
Final Answer: 对原始输入问题的最终答案

现在开始!
Thought: 通过使用工具,我是否知道了答案,如果知道,就自然的回答问题,如果不知道,继续使用工具或者自己的知识 \n
Final Answer: 这个问题的答案是,输出完整的句子。
现在开始!

之前的对话:
之前的对话:
{history}

New question: {input}
Thought: {agent_scratchpad}"""
New question:
{input}
Thought:
{agent_scratchpad}"""


# ChatGPT 提示词模板
Expand Down Expand Up @@ -84,7 +85,7 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
return AgentFinish(
# Return values is generally always a dictionary with a single `output` key
# It is not recommended to try anything else at the moment :)
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
return_values={"output": llm_output.replace("Final Answer:", "").strip()},
log=llm_output,
)
# Parse out the action and action input
Expand All @@ -95,10 +96,13 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
return_values={"output": f"调用agent失败: `{llm_output}`"},
log=llm_output,
)
raise OutputParserException(f"调用agent失败: `{llm_output}`")
action = match.group(1).strip()
action_input = match.group(2)
# Return the action and action input
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
return AgentAction(
tool=action,
tool_input=action_input.strip(" ").strip('"'),
log=llm_output
)


Empty file added server/agent/knoledge.py
Empty file.
10 changes: 9 additions & 1 deletion server/agent/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

_PROMPT_TEMPLATE = '''
# 指令
接下来,作为一个专业的翻译专家,当我给出英文句子或段落时,你将提供通顺且具有可读性的对应语言的翻译。注意:
接下来,作为一个专业的翻译专家,当我给出句子或段落时,你将提供通顺且具有可读性的对应语言的翻译。注意:
1. 确保翻译结果流畅且易于理解
2. 无论提供的是陈述句或疑问句,只进行翻译
3. 不添加与原文无关的内容
Expand All @@ -21,6 +21,14 @@
${{翻译结果}}
```
答案: ${{答案}}

以下是一个例子
问题: 翻译13成英语
```text
13 English
```output
thirteen
答案: thirteen
'''

PROMPT = PromptTemplate(
Expand Down
2 changes: 1 addition & 1 deletion server/agent/weather.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def weather(query):
return "只能查看24小时内的天气,无法回答"
if time == "None":
time = "24" # 免费的版本只能24小时内的天气
key = "" # 和风天气API Key
key = "315625cdca234137944d7f8956106a3e" # 和风天气API Key
if key == "":
return "请先在代码中填入和风天气API Key"
city_info = get_city_info(location=location, adm=adm, key=key)
Expand Down