Feat/report (#173)

dataelement · Dec 1, 2023 · 9271a14 · 9271a14
2 parents ac8f487 + eda6961
commit 9271a14
Show file tree

Hide file tree

Showing 45 changed files with 1,353 additions and 1,861 deletions.
diff --git a/src/backend/bisheng/api/v1/callback.py b/src/backend/bisheng/api/v1/callback.py
@@ -99,14 +99,21 @@ async def on_text(self, text: str, **kwargs: Any) -> Any:
             else:
                 await self.websocket.send_json(log.dict())
                 await self.websocket.send_json(start.dict())
-        elif kwargs.get('type'):
-            start = ChatResponse(type='start', category=kwargs.get('type'))
-            end = ChatResponse(type='end', intermediate_steps=text, category=kwargs.get('type'))
-            await self.websocket.send_json(start.dict())
-            await self.websocket.send_json(end.dict())
         elif 'category' in kwargs:
-            log = ChatResponse(message=text, type='stream')
-            await self.websocket.send_json(log.dict())
+            if 'autogen' == kwargs['category']:
+                log = ChatResponse(message=text, type='stream')
+                await self.websocket.send_json(log.dict())
+                if kwargs.get('type'):
+                    # 兼容下
+                    start = ChatResponse(type='start', category=kwargs.get('type'))
+                    end = ChatResponse(type='end', intermediate_steps=text,
+                                       category=kwargs.get('type'))
+                    await self.websocket.send_json(start.dict())
+                    await self.websocket.send_json(end.dict())
+            else:
+                log = ChatResponse(message=text, intermediate_steps=kwargs['log'],
+                                   type=kwargs['type'], category=kwargs['category'])
+                await self.websocket.send_json(log.dict())
 
     async def on_agent_action(self, action: AgentAction, **kwargs: Any):
         log = f'Thought: {action.log}'

diff --git a/src/backend/bisheng/api/v1/report.py b/src/backend/bisheng/api/v1/report.py
@@ -34,6 +34,7 @@ async def callback(data: dict, session: Session = Depends(get_session)):
                                  .where(or_(Report.version_key == key,
                                             Report.newversion_key == key))).first()
         if not db_report:
+            logger.error(f'report_callback cannot find the flow_id flow_id={key}')
             raise HTTPException(status_code=500, detail='cannot find the flow_id')
         db_report.object_name = object_name
         db_report.version_key = key

diff --git a/src/backend/bisheng/api/v1/validate.py b/src/backend/bisheng/api/v1/validate.py
@@ -1,5 +1,4 @@
-from bisheng.api.v1.base import (Code, CodeValidationResponse,
-                                 PromptValidationResponse,
+from bisheng.api.v1.base import (Code, CodeValidationResponse, PromptValidationResponse,
                                  ValidatePromptRequest, validate_prompt)
 from bisheng.template.field.base import TemplateField
 from bisheng.utils.logger import logger
@@ -67,7 +66,7 @@ def add_new_variables_to_template(input_variables, prompt_request):
                 show=True,
                 advanced=False,
                 multiline=True,
-                input_types=['Document', 'BaseOutputParser'],
+                input_types=['Document', 'BaseOutputParser', 'VariableNode'],
                 value='',  # Set the value to empty string
             )
             if variable in prompt_request.frontend_node.template:

diff --git a/src/backend/bisheng/api/v2/filelib.py b/src/backend/bisheng/api/v2/filelib.py
@@ -148,7 +148,7 @@ async def upload_file(*,
 
     file_name = file.filename
     # 缓存本地
-    file_path = save_uploaded_file(file.file, 'bisheng').as_posix()
+    file_path = save_uploaded_file(file.file, 'bisheng', file_name).as_posix()
     auto_p = True
     if auto_p:
         separator = ['\n\n', '\n', ' ', '']

diff --git a/src/backend/bisheng/cache/utils.py b/src/backend/bisheng/cache/utils.py
@@ -182,7 +182,8 @@ def save_uploaded_file(file, folder_name, file_name):
         mino_client.upload_tmp(file_name, file_byte)
         file_path = mino_client.get_share_link(file_name, tmp_bucket)
     else:
-        file_path = folder_path / md5_name
+        file_type = md5_name.split('.')[-1]
+        file_path = folder_path / f'{md5_name}.{file_type}'
         with open(file_path, 'wb') as new_file:
             while chunk := file.read(8192):
                 new_file.write(chunk)
@@ -191,7 +192,7 @@ def save_uploaded_file(file, folder_name, file_name):
 
 
 @create_cache_folder
-def save_download_file(file_byte, folder_name):
+def save_download_file(file_byte, folder_name, filename):
     """
     Save an uploaded file to the specified folder with a hash of its content as the file name.
 
@@ -218,8 +219,8 @@ def save_download_file(file_byte, folder_name):
     # Use the hex digest of the hash as the file name
     hex_dig = sha256_hash.hexdigest()
     md5_name = hex_dig
-
-    file_path = folder_path / md5_name
+    file_type = filename.split('.')[-1]
+    file_path = folder_path / f'{md5_name}.{file_type}'
     with open(file_path, 'wb') as new_file:
         new_file.write(file_byte)
     return str(file_path)
@@ -235,10 +236,15 @@ def file_download(file_path: str):
                 'Check the url of your file; returned status code %s'
                 % r.status_code
             )
-
-        file_name = unquote(urlparse(file_path).path.split('/')[-1])
-        file_path = save_download_file(r.content, 'bisheng')
-        return file_path, file_name
+        # 检查Content-Disposition头来找出文件名
+        content_disposition = r.headers.get('Content-Disposition')
+        filename = ''
+        if content_disposition:
+            filename = content_disposition.split('filename=')[-1].strip("\"'")
+        if not filename:
+            filename = unquote(urlparse(file_path).path.split('/')[-1])
+        file_path = save_download_file(r.content, 'bisheng', filename)
+        return file_path, filename
     elif not os.path.isfile(file_path):
         raise ValueError('File path %s is not a valid file or url' % file_path)
     return file_path, ''

diff --git a/src/backend/bisheng/chat/handlers.py b/src/backend/bisheng/chat/handlers.py
@@ -64,11 +64,15 @@ async def process_report(self, session: ChatManager,
             return
         start_resp = ChatResponse(type='start', user_id=user_id)
         await session.send_json(client_id, chat_id, start_resp)
+
+        langchain_object = session.in_memory_cache.get(key)
         template_muban = mino_client.get_share_link(template.object_name)
-        test_replace_string(template_muban, result, 'report.docx')
-        file = mino_client.get_share_link('report.docx')
+        report_name = langchain_object.report_name
+        report_name = report_name if report_name.endswith('.docx') else f'{report_name}.docx'
+        test_replace_string(template_muban, result, report_name)
+        file = mino_client.get_share_link(report_name)
         response = ChatResponse(type='end',
-                                files=[{'file_url': file, 'file_name': 'report.docx'}],
+                                files=[{'file_url': file, 'file_name': report_name}],
                                 user_id=user_id)
         await session.send_json(client_id, chat_id, response)
         close_resp = ChatResponse(type='close', category='system', user_id=user_id)
@@ -86,7 +90,7 @@ async def process_message(self, session: ChatManager,
         artifacts = session.in_memory_cache.get(key + '_artifacts')
         if artifacts:
             for k, value in artifacts.items():
-                if k in chat_inputs:
+                if k in chat_inputs and value:
                     chat_inputs[k] = value
         chat_inputs = ChatMessage(message=chat_inputs, category='question',
                                   is_bot=not is_begin, type='bot', user_id=user_id,)
@@ -173,21 +177,6 @@ async def process_file(self, session: ChatManager,
                            type='end', user_id=user_id)
         session.chat_history.add_message(client_id, chat_id, file)
         start_resp = ChatResponse(type='start', category='system', user_id=user_id)
-        await session.send_json(client_id, chat_id, start_resp)
-
-        if not batch_question:
-            # no question
-            step_resp = ChatResponse(type='end',
-                                     intermediate_steps='File parsing complete',
-                                     category='system', user_id=user_id)
-            await session.send_json(client_id, chat_id, step_resp)
-            start_resp.type = 'close'
-            await session.send_json(client_id, chat_id, start_resp)
-            return
-
-        step_resp = ChatResponse(intermediate_steps='File parsing complete, analysis starting',
-                                 type='end', category='system', user_id=user_id)
-        await session.send_json(client_id, chat_id, step_resp)
 
         key = get_cache_key(client_id, chat_id)
         langchain_object = session.in_memory_cache.get(key)
@@ -258,8 +247,10 @@ async def intermediate_logs(self, session: ChatManager,
                 sender = message.get('sender')
                 receiver = message.get('receiver')
                 is_bot = False if receiver and receiver.get('is_bot') else True
+                category = message.get('category', 'processing')
                 msg = ChatResponse(message=content, sender=sender, receiver=receiver,
-                                   type='end', user_id=user_id, is_bot=is_bot)
+                                   type='end', user_id=user_id, is_bot=is_bot,
+                                   category=category)
                 steps.append(msg)
         else:
             # agent model will produce the steps log

diff --git a/src/backend/bisheng/chat/manager.py b/src/backend/bisheng/chat/manager.py
@@ -16,6 +16,7 @@
 from bisheng.processing.process import process_tweaks
 from bisheng.utils.logger import logger
 from bisheng.utils.util import get_cache_key
+from bisheng_langchain.input_output.output import Report
 from fastapi import WebSocket, status
 
 
@@ -171,38 +172,70 @@ async def handle_websocket(self, client_id: str, chat_id: str, websocket: WebSoc
                 if is_begin:
                     start_resp = ChatResponse(type='begin', category='system', user_id=user_id)
                     await self.send_json(client_id, chat_id, start_resp)
+                start_resp.type = 'start'
+
                 # should input data
                 langchain_obj_key = get_cache_key(client_id, chat_id)
+                has_file = False
                 if 'inputs' in payload and 'data' in payload['inputs']:
                     node_data = payload['inputs']['data']
                     gragh_data = self.refresh_graph_data(gragh_data, node_data)
                     self.set_cache(langchain_obj_key, None)  # rebuild object
+                    has_file = any(['InputFile' in nd.get('id') for nd in node_data])
+                if has_file:
+                    step_resp = ChatResponse(intermediate_steps='File upload complete and begin to parse',  # noqa
+                                             type='end', category='system', user_id=user_id)
+                    await self.send_json(client_id, chat_id, start_resp)
+                    await self.send_json(client_id, chat_id, step_resp, add=False)
+                    await self.send_json(client_id, chat_id, start_resp)
+                    logger.info('input_file start_log')
+                    await asyncio.sleep(1)  # why frontend not recieve imediately
 
                 batch_question = []
                 if not self.in_memory_cache.get(langchain_obj_key):
+                    logger.info(f"init_langchain key={langchain_obj_key} input={payload['inputs']}")
                     try:
-                        gragh = self.init_langchain_object(client_id, chat_id, user_id, gragh_data)
-                        if 'data' in payload['inputs']:
-                            action = 'auto_file'   # has input data, default is file process
-                        for node in gragh.nodes:
-                            if node.vertex_type == 'Report':
-                                action = 'report'
-                                break
-                            if node.vertex_type == 'InputNode':
-                                # preset question  only use for auto_file
-                                batch_question = node._built_object
+                        gragh = self.init_langchain_object(client_id, chat_id,
+                                                           user_id, gragh_data)
                     except Exception as e:
                         logger.exception(e)
-                        step_resp = ChatResponse(intermediate_steps='input data is parsed fail',
+                        step_resp = ChatResponse(intermediate_steps='Input data is parsed fail',
                                                  type='end', category='system', user_id=user_id)
+                        if has_file:
+                            step_resp.intermediate_steps = 'File is parsed fail'
                         await self.send_json(client_id, chat_id, step_resp)
                         start_resp.type = 'close'
                         await self.send_json(client_id, chat_id, start_resp)
                         # socket close?
                         return
-
-                if action == 'auto_file':
-                    payload['inputs']['questions'] = batch_question
+                    if has_file:
+                        batch_question = [node._built_object
+                                          for node in gragh.nodes
+                                          if node.vertex_type == 'InputNode']
+
+                langchain_obj = self.in_memory_cache.get(langchain_obj_key)
+                if isinstance(langchain_obj, Report):
+                    action = 'report'
+                elif 'data' in payload['inputs']:
+                    action = 'auto_file'   # has input data, default is file process
+                # default not set, for autogen set before
+
+                if has_file:
+                    if not batch_question:
+                        if action == 'auto_file':
+                            # no question
+                            step_resp = ChatResponse(type='end',
+                                                     intermediate_steps='File parsing complete',
+                                                     category='system', user_id=user_id)
+                            await self.send_json(client_id, chat_id, step_resp)
+                            start_resp.type = 'close'
+                            await self.send_json(client_id, chat_id, start_resp)
+                            continue
+                    step_resp = ChatResponse(intermediate_steps='File parsing complete. Analysis starting',  # noqa
+                                             type='end', category='system', user_id=user_id)
+                    await self.send_json(client_id, chat_id, step_resp, add=False)
+                    if action == 'auto_file':
+                        payload['inputs']['questions'] = [question for question in batch_question]
 
                 asyncio.create_task(Handler().dispatch_task(self, client_id, chat_id, action,
                                                             payload, user_id))

diff --git a/src/backend/bisheng/interface/initialize/loading.py b/src/backend/bisheng/interface/initialize/loading.py
@@ -141,7 +141,7 @@ def instantiate_input_output(node_type, class_object, params, id_dict):
         variable_node_id = id_dict.get('variables')
         if variable:
             params['variables'] = [{'node_id': variable_node_id[0],
-                                   'name': variable}]
+                                   'input': variable}]
 
         params['chains'] = chain_list
         return class_object(**params)
@@ -251,16 +251,17 @@ def instantiate_retriever(node_type, class_object, params):
 
 
 def instantiate_chains(node_type, class_object: Type[Chain], params: Dict, id_dict: Dict):
-    if 'retriever' in params and hasattr(params['retriever'], 'as_retriever'):
+    if 'retriever' in params:
         user_name = params.pop('user_name', '')
-        if settings.get_from_db('file_access'):
-            # need to verify file access
-            access_url = settings.get_from_db('file_access') + f'?username={user_name}'
-            vectorstore = VectorStoreFilterRetriever(vectorstore=params['retriever'],
-                                                     access_url=access_url)
-        else:
-            vectorstore = params['retriever'].as_retriever()
-        params['retriever'] = vectorstore
+        if hasattr(params['retriever'], 'as_retriever'):
+            if settings.get_from_db('file_access'):
+                # need to verify file access
+                access_url = settings.get_from_db('file_access') + f'?username={user_name}'
+                vectorstore = VectorStoreFilterRetriever(vectorstore=params['retriever'],
+                                                         access_url=access_url)
+            else:
+                vectorstore = params['retriever'].as_retriever()
+            params['retriever'] = vectorstore
     # sequence chain
     if node_type == 'SequentialChain':
         # 改造sequence 支持自定义chain顺序
@@ -280,7 +281,7 @@ def instantiate_chains(node_type, class_object: Type[Chain], params: Dict, id_di
             'prompt': params.pop('combine_docs_chain_kwargs', None)
         }
     # 人工组装MultiPromptChain
-    if node_type == 'MultiPromptChain':
+    if node_type in {'MultiPromptChain', 'MultiRuleChain'}:
         destination_chain_name = eval(params['destination_chain_name'])
         llm_chains = params['LLMChains']
         destination_chain = {}

diff --git a/src/backend/bisheng/main.py b/src/backend/bisheng/main.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 from typing import Optional
+from venv import logger
 
 from bisheng.api import router, router_rpc
 from bisheng.database.base import create_db_and_tables
@@ -15,6 +16,7 @@
 
 def handle_http_exception(req: Request, exc: HTTPException) -> ORJSONResponse:
     msg = {'status_code': exc.status_code, 'status_message': exc.detail}
+    logger.error(msg)
     return ORJSONResponse(content=msg)
 
 

diff --git a/src/backend/bisheng/template/frontend_node/chains.py b/src/backend/bisheng/template/frontend_node/chains.py
@@ -50,7 +50,7 @@ def add_extra_fields(self) -> None:
                               name='chain_order',
                               advanced=False,
                               value='[]'))
-        elif self.template.type_name == 'MultiPromptChain':
+        elif self.template.type_name in {'MultiPromptChain', 'MultiRuleChain'}:
             self.template.add_field(
                 TemplateField(field_type='Chain',
                               required=True,
@@ -66,11 +66,14 @@ def add_extra_fields(self) -> None:
                               is_list=True,
                               name='destination_chain_name',
                               advanced=False,
+                              info='{chain_id: name}',
                               value='{}'))
 
     @staticmethod
     def format_field(field: TemplateField, name: Optional[str] = None) -> None:
         FrontendNode.format_field(field, name)
+        if name == 'RuleBasedRouter' and field.name == 'rule_function':
+            field.field_type = 'function'
 
         if name == 'RetrievalQA' and field.name == 'memory':
             field.show = False

diff --git a/src/backend/bisheng/template/frontend_node/documentloaders.py b/src/backend/bisheng/template/frontend_node/documentloaders.py
@@ -79,13 +79,15 @@ def add_extra_base_classes(self) -> None:
             build_file_field(suffixes=['.pdf'], fileTypes=['pdf']),
         'UniversalKVLoader':
             build_file_field(
-                suffixes=['.jpg', '.png', '.jpeg', '.pdf'],
-                fileTypes=['jpg', 'png', 'jpeg', 'pdf'],
+                suffixes=['.jpg', '.png', '.jpeg', '.bmp', '.pdf'],
+                fileTypes=['jpg', 'png', 'jpeg', 'bmp', 'pdf'],
             ),
         'CustomKVLoader':
             build_file_field(
-                suffixes=['.jpg', '.png', '.jpeg', '.pdf'],
-                fileTypes=['jpg', 'png', 'jpeg', 'pdf'],
+                suffixes=['.jpg', '.png', '.jpeg', '.pdf', '.txt', '.docx',
+                          '.doc', '.bmp', '.tif', '.tiff', '.xls', '.xlsx'],
+                fileTypes=['jpg', 'png', 'jpeg', 'pdf', 'txt', 'docx',
+                           'doc', 'bmp', 'tif', 'tiff', 'xls', 'xlsx'],
             ),
     }