Skip to content

Commit

Permalink
Feat/report (#173)
Browse files Browse the repository at this point in the history
  • Loading branch information
yaojin3616 committed Dec 1, 2023
2 parents ac8f487 + eda6961 commit 9271a14
Show file tree
Hide file tree
Showing 45 changed files with 1,353 additions and 1,861 deletions.
21 changes: 14 additions & 7 deletions src/backend/bisheng/api/v1/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,21 @@ async def on_text(self, text: str, **kwargs: Any) -> Any:
else:
await self.websocket.send_json(log.dict())
await self.websocket.send_json(start.dict())
elif kwargs.get('type'):
start = ChatResponse(type='start', category=kwargs.get('type'))
end = ChatResponse(type='end', intermediate_steps=text, category=kwargs.get('type'))
await self.websocket.send_json(start.dict())
await self.websocket.send_json(end.dict())
elif 'category' in kwargs:
log = ChatResponse(message=text, type='stream')
await self.websocket.send_json(log.dict())
if 'autogen' == kwargs['category']:
log = ChatResponse(message=text, type='stream')
await self.websocket.send_json(log.dict())
if kwargs.get('type'):
# 兼容下
start = ChatResponse(type='start', category=kwargs.get('type'))
end = ChatResponse(type='end', intermediate_steps=text,
category=kwargs.get('type'))
await self.websocket.send_json(start.dict())
await self.websocket.send_json(end.dict())
else:
log = ChatResponse(message=text, intermediate_steps=kwargs['log'],
type=kwargs['type'], category=kwargs['category'])
await self.websocket.send_json(log.dict())

async def on_agent_action(self, action: AgentAction, **kwargs: Any):
log = f'Thought: {action.log}'
Expand Down
1 change: 1 addition & 0 deletions src/backend/bisheng/api/v1/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async def callback(data: dict, session: Session = Depends(get_session)):
.where(or_(Report.version_key == key,
Report.newversion_key == key))).first()
if not db_report:
logger.error(f'report_callback cannot find the flow_id flow_id={key}')
raise HTTPException(status_code=500, detail='cannot find the flow_id')
db_report.object_name = object_name
db_report.version_key = key
Expand Down
5 changes: 2 additions & 3 deletions src/backend/bisheng/api/v1/validate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from bisheng.api.v1.base import (Code, CodeValidationResponse,
PromptValidationResponse,
from bisheng.api.v1.base import (Code, CodeValidationResponse, PromptValidationResponse,
ValidatePromptRequest, validate_prompt)
from bisheng.template.field.base import TemplateField
from bisheng.utils.logger import logger
Expand Down Expand Up @@ -67,7 +66,7 @@ def add_new_variables_to_template(input_variables, prompt_request):
show=True,
advanced=False,
multiline=True,
input_types=['Document', 'BaseOutputParser'],
input_types=['Document', 'BaseOutputParser', 'VariableNode'],
value='', # Set the value to empty string
)
if variable in prompt_request.frontend_node.template:
Expand Down
2 changes: 1 addition & 1 deletion src/backend/bisheng/api/v2/filelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ async def upload_file(*,

file_name = file.filename
# 缓存本地
file_path = save_uploaded_file(file.file, 'bisheng').as_posix()
file_path = save_uploaded_file(file.file, 'bisheng', file_name).as_posix()
auto_p = True
if auto_p:
separator = ['\n\n', '\n', ' ', '']
Expand Down
22 changes: 14 additions & 8 deletions src/backend/bisheng/cache/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ def save_uploaded_file(file, folder_name, file_name):
mino_client.upload_tmp(file_name, file_byte)
file_path = mino_client.get_share_link(file_name, tmp_bucket)
else:
file_path = folder_path / md5_name
file_type = md5_name.split('.')[-1]
file_path = folder_path / f'{md5_name}.{file_type}'
with open(file_path, 'wb') as new_file:
while chunk := file.read(8192):
new_file.write(chunk)
Expand All @@ -191,7 +192,7 @@ def save_uploaded_file(file, folder_name, file_name):


@create_cache_folder
def save_download_file(file_byte, folder_name):
def save_download_file(file_byte, folder_name, filename):
"""
Save an uploaded file to the specified folder with a hash of its content as the file name.
Expand All @@ -218,8 +219,8 @@ def save_download_file(file_byte, folder_name):
# Use the hex digest of the hash as the file name
hex_dig = sha256_hash.hexdigest()
md5_name = hex_dig

file_path = folder_path / md5_name
file_type = filename.split('.')[-1]
file_path = folder_path / f'{md5_name}.{file_type}'
with open(file_path, 'wb') as new_file:
new_file.write(file_byte)
return str(file_path)
Expand All @@ -235,10 +236,15 @@ def file_download(file_path: str):
'Check the url of your file; returned status code %s'
% r.status_code
)

file_name = unquote(urlparse(file_path).path.split('/')[-1])
file_path = save_download_file(r.content, 'bisheng')
return file_path, file_name
# 检查Content-Disposition头来找出文件名
content_disposition = r.headers.get('Content-Disposition')
filename = ''
if content_disposition:
filename = content_disposition.split('filename=')[-1].strip("\"'")
if not filename:
filename = unquote(urlparse(file_path).path.split('/')[-1])
file_path = save_download_file(r.content, 'bisheng', filename)
return file_path, filename
elif not os.path.isfile(file_path):
raise ValueError('File path %s is not a valid file or url' % file_path)
return file_path, ''
Expand Down
31 changes: 11 additions & 20 deletions src/backend/bisheng/chat/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,15 @@ async def process_report(self, session: ChatManager,
return
start_resp = ChatResponse(type='start', user_id=user_id)
await session.send_json(client_id, chat_id, start_resp)

langchain_object = session.in_memory_cache.get(key)
template_muban = mino_client.get_share_link(template.object_name)
test_replace_string(template_muban, result, 'report.docx')
file = mino_client.get_share_link('report.docx')
report_name = langchain_object.report_name
report_name = report_name if report_name.endswith('.docx') else f'{report_name}.docx'
test_replace_string(template_muban, result, report_name)
file = mino_client.get_share_link(report_name)
response = ChatResponse(type='end',
files=[{'file_url': file, 'file_name': 'report.docx'}],
files=[{'file_url': file, 'file_name': report_name}],
user_id=user_id)
await session.send_json(client_id, chat_id, response)
close_resp = ChatResponse(type='close', category='system', user_id=user_id)
Expand All @@ -86,7 +90,7 @@ async def process_message(self, session: ChatManager,
artifacts = session.in_memory_cache.get(key + '_artifacts')
if artifacts:
for k, value in artifacts.items():
if k in chat_inputs:
if k in chat_inputs and value:
chat_inputs[k] = value
chat_inputs = ChatMessage(message=chat_inputs, category='question',
is_bot=not is_begin, type='bot', user_id=user_id,)
Expand Down Expand Up @@ -173,21 +177,6 @@ async def process_file(self, session: ChatManager,
type='end', user_id=user_id)
session.chat_history.add_message(client_id, chat_id, file)
start_resp = ChatResponse(type='start', category='system', user_id=user_id)
await session.send_json(client_id, chat_id, start_resp)

if not batch_question:
# no question
step_resp = ChatResponse(type='end',
intermediate_steps='File parsing complete',
category='system', user_id=user_id)
await session.send_json(client_id, chat_id, step_resp)
start_resp.type = 'close'
await session.send_json(client_id, chat_id, start_resp)
return

step_resp = ChatResponse(intermediate_steps='File parsing complete, analysis starting',
type='end', category='system', user_id=user_id)
await session.send_json(client_id, chat_id, step_resp)

key = get_cache_key(client_id, chat_id)
langchain_object = session.in_memory_cache.get(key)
Expand Down Expand Up @@ -258,8 +247,10 @@ async def intermediate_logs(self, session: ChatManager,
sender = message.get('sender')
receiver = message.get('receiver')
is_bot = False if receiver and receiver.get('is_bot') else True
category = message.get('category', 'processing')
msg = ChatResponse(message=content, sender=sender, receiver=receiver,
type='end', user_id=user_id, is_bot=is_bot)
type='end', user_id=user_id, is_bot=is_bot,
category=category)
steps.append(msg)
else:
# agent model will produce the steps log
Expand Down
61 changes: 47 additions & 14 deletions src/backend/bisheng/chat/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from bisheng.processing.process import process_tweaks
from bisheng.utils.logger import logger
from bisheng.utils.util import get_cache_key
from bisheng_langchain.input_output.output import Report
from fastapi import WebSocket, status


Expand Down Expand Up @@ -171,38 +172,70 @@ async def handle_websocket(self, client_id: str, chat_id: str, websocket: WebSoc
if is_begin:
start_resp = ChatResponse(type='begin', category='system', user_id=user_id)
await self.send_json(client_id, chat_id, start_resp)
start_resp.type = 'start'

# should input data
langchain_obj_key = get_cache_key(client_id, chat_id)
has_file = False
if 'inputs' in payload and 'data' in payload['inputs']:
node_data = payload['inputs']['data']
gragh_data = self.refresh_graph_data(gragh_data, node_data)
self.set_cache(langchain_obj_key, None) # rebuild object
has_file = any(['InputFile' in nd.get('id') for nd in node_data])
if has_file:
step_resp = ChatResponse(intermediate_steps='File upload complete and begin to parse', # noqa
type='end', category='system', user_id=user_id)
await self.send_json(client_id, chat_id, start_resp)
await self.send_json(client_id, chat_id, step_resp, add=False)
await self.send_json(client_id, chat_id, start_resp)
logger.info('input_file start_log')
await asyncio.sleep(1) # why frontend not recieve imediately

batch_question = []
if not self.in_memory_cache.get(langchain_obj_key):
logger.info(f"init_langchain key={langchain_obj_key} input={payload['inputs']}")
try:
gragh = self.init_langchain_object(client_id, chat_id, user_id, gragh_data)
if 'data' in payload['inputs']:
action = 'auto_file' # has input data, default is file process
for node in gragh.nodes:
if node.vertex_type == 'Report':
action = 'report'
break
if node.vertex_type == 'InputNode':
# preset question only use for auto_file
batch_question = node._built_object
gragh = self.init_langchain_object(client_id, chat_id,
user_id, gragh_data)
except Exception as e:
logger.exception(e)
step_resp = ChatResponse(intermediate_steps='input data is parsed fail',
step_resp = ChatResponse(intermediate_steps='Input data is parsed fail',
type='end', category='system', user_id=user_id)
if has_file:
step_resp.intermediate_steps = 'File is parsed fail'
await self.send_json(client_id, chat_id, step_resp)
start_resp.type = 'close'
await self.send_json(client_id, chat_id, start_resp)
# socket close?
return

if action == 'auto_file':
payload['inputs']['questions'] = batch_question
if has_file:
batch_question = [node._built_object
for node in gragh.nodes
if node.vertex_type == 'InputNode']

langchain_obj = self.in_memory_cache.get(langchain_obj_key)
if isinstance(langchain_obj, Report):
action = 'report'
elif 'data' in payload['inputs']:
action = 'auto_file' # has input data, default is file process
# default not set, for autogen set before

if has_file:
if not batch_question:
if action == 'auto_file':
# no question
step_resp = ChatResponse(type='end',
intermediate_steps='File parsing complete',
category='system', user_id=user_id)
await self.send_json(client_id, chat_id, step_resp)
start_resp.type = 'close'
await self.send_json(client_id, chat_id, start_resp)
continue
step_resp = ChatResponse(intermediate_steps='File parsing complete. Analysis starting', # noqa
type='end', category='system', user_id=user_id)
await self.send_json(client_id, chat_id, step_resp, add=False)
if action == 'auto_file':
payload['inputs']['questions'] = [question for question in batch_question]

asyncio.create_task(Handler().dispatch_task(self, client_id, chat_id, action,
payload, user_id))
Expand Down
23 changes: 12 additions & 11 deletions src/backend/bisheng/interface/initialize/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def instantiate_input_output(node_type, class_object, params, id_dict):
variable_node_id = id_dict.get('variables')
if variable:
params['variables'] = [{'node_id': variable_node_id[0],
'name': variable}]
'input': variable}]

params['chains'] = chain_list
return class_object(**params)
Expand Down Expand Up @@ -251,16 +251,17 @@ def instantiate_retriever(node_type, class_object, params):


def instantiate_chains(node_type, class_object: Type[Chain], params: Dict, id_dict: Dict):
if 'retriever' in params and hasattr(params['retriever'], 'as_retriever'):
if 'retriever' in params:
user_name = params.pop('user_name', '')
if settings.get_from_db('file_access'):
# need to verify file access
access_url = settings.get_from_db('file_access') + f'?username={user_name}'
vectorstore = VectorStoreFilterRetriever(vectorstore=params['retriever'],
access_url=access_url)
else:
vectorstore = params['retriever'].as_retriever()
params['retriever'] = vectorstore
if hasattr(params['retriever'], 'as_retriever'):
if settings.get_from_db('file_access'):
# need to verify file access
access_url = settings.get_from_db('file_access') + f'?username={user_name}'
vectorstore = VectorStoreFilterRetriever(vectorstore=params['retriever'],
access_url=access_url)
else:
vectorstore = params['retriever'].as_retriever()
params['retriever'] = vectorstore
# sequence chain
if node_type == 'SequentialChain':
# 改造sequence 支持自定义chain顺序
Expand All @@ -280,7 +281,7 @@ def instantiate_chains(node_type, class_object: Type[Chain], params: Dict, id_di
'prompt': params.pop('combine_docs_chain_kwargs', None)
}
# 人工组装MultiPromptChain
if node_type == 'MultiPromptChain':
if node_type in {'MultiPromptChain', 'MultiRuleChain'}:
destination_chain_name = eval(params['destination_chain_name'])
llm_chains = params['LLMChains']
destination_chain = {}
Expand Down
2 changes: 2 additions & 0 deletions src/backend/bisheng/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
from typing import Optional
from venv import logger

from bisheng.api import router, router_rpc
from bisheng.database.base import create_db_and_tables
Expand All @@ -15,6 +16,7 @@

def handle_http_exception(req: Request, exc: HTTPException) -> ORJSONResponse:
msg = {'status_code': exc.status_code, 'status_message': exc.detail}
logger.error(msg)
return ORJSONResponse(content=msg)


Expand Down
5 changes: 4 additions & 1 deletion src/backend/bisheng/template/frontend_node/chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def add_extra_fields(self) -> None:
name='chain_order',
advanced=False,
value='[]'))
elif self.template.type_name == 'MultiPromptChain':
elif self.template.type_name in {'MultiPromptChain', 'MultiRuleChain'}:
self.template.add_field(
TemplateField(field_type='Chain',
required=True,
Expand All @@ -66,11 +66,14 @@ def add_extra_fields(self) -> None:
is_list=True,
name='destination_chain_name',
advanced=False,
info='{chain_id: name}',
value='{}'))

@staticmethod
def format_field(field: TemplateField, name: Optional[str] = None) -> None:
FrontendNode.format_field(field, name)
if name == 'RuleBasedRouter' and field.name == 'rule_function':
field.field_type = 'function'

if name == 'RetrievalQA' and field.name == 'memory':
field.show = False
Expand Down
10 changes: 6 additions & 4 deletions src/backend/bisheng/template/frontend_node/documentloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,15 @@ def add_extra_base_classes(self) -> None:
build_file_field(suffixes=['.pdf'], fileTypes=['pdf']),
'UniversalKVLoader':
build_file_field(
suffixes=['.jpg', '.png', '.jpeg', '.pdf'],
fileTypes=['jpg', 'png', 'jpeg', 'pdf'],
suffixes=['.jpg', '.png', '.jpeg', '.bmp', '.pdf'],
fileTypes=['jpg', 'png', 'jpeg', 'bmp', 'pdf'],
),
'CustomKVLoader':
build_file_field(
suffixes=['.jpg', '.png', '.jpeg', '.pdf'],
fileTypes=['jpg', 'png', 'jpeg', 'pdf'],
suffixes=['.jpg', '.png', '.jpeg', '.pdf', '.txt', '.docx',
'.doc', '.bmp', '.tif', '.tiff', '.xls', '.xlsx'],
fileTypes=['jpg', 'png', 'jpeg', 'pdf', 'txt', 'docx',
'doc', 'bmp', 'tif', 'tiff', 'xls', 'xlsx'],
),
}

Expand Down
Loading

0 comments on commit 9271a14

Please sign in to comment.