draw project struct with mermaid

binary-husky · Jan 20, 2024 · dd2a97e · dd2a97e
1 parent e579006
commit dd2a97e
Show file tree

Hide file tree

Showing 6 changed files with 155 additions and 32 deletions.
diff --git a/crazy_functional.py b/crazy_functional.py
@@ -32,8 +32,6 @@ def get_crazy_functions():
     from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
     from crazy_functions.Latex全文润色 import Latex中文润色
     from crazy_functions.Latex全文润色 import Latex英文纠错
-    from crazy_functions.Latex全文翻译 import Latex中译英
-    from crazy_functions.Latex全文翻译 import Latex英译中
     from crazy_functions.批量Markdown翻译 import Markdown中译英
     from crazy_functions.虚空终端 import 虚空终端
 
@@ -237,13 +235,7 @@ def get_crazy_functions():
             "Info": "对英文Latex项目全文进行润色处理 | 输入参数为路径或上传压缩包",
             "Function": HotReload(Latex英文润色),
         },
-        "英文Latex项目全文纠错（输入路径或上传压缩包）": {
-            "Group": "学术",
-            "Color": "stop",
-            "AsButton": False,  # 加入下拉菜单中
-            "Info": "对英文Latex项目全文进行纠错处理 | 输入参数为路径或上传压缩包",
-            "Function": HotReload(Latex英文纠错),
-        },
+
         "中文Latex项目全文润色（输入路径或上传压缩包）": {
             "Group": "学术",
             "Color": "stop",
@@ -252,6 +244,14 @@ def get_crazy_functions():
             "Function": HotReload(Latex中文润色),
         },
         # 已经被新插件取代
+        # "英文Latex项目全文纠错（输入路径或上传压缩包）": {
+        #     "Group": "学术",
+        #     "Color": "stop",
+        #     "AsButton": False,  # 加入下拉菜单中
+        #     "Info": "对英文Latex项目全文进行纠错处理 | 输入参数为路径或上传压缩包",
+        #     "Function": HotReload(Latex英文纠错),
+        # },
+        # 已经被新插件取代
         # "Latex项目全文中译英（输入路径或上传压缩包）": {
         #     "Group": "学术",
         #     "Color": "stop",
@@ -523,6 +523,7 @@ def get_crazy_functions():
 
     try:
         from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
+        from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
 
         function_plugins.update(
             {
@@ -533,13 +534,7 @@ def get_crazy_functions():
                     "AdvancedArgs": True,
                     "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令（使用英文）。",
                     "Function": HotReload(Latex英文纠错加PDF对比),
-                }
-            }
-        )
-        from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
-
-        function_plugins.update(
-            {
+                },
                 "Arxiv论文精细翻译（输入arxivID）[需Latex]": {
                     "Group": "学术",
                     "Color": "stop",
@@ -550,11 +545,7 @@ def get_crazy_functions():
                     + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
                     "Info": "Arixv论文精细翻译 | 输入参数arxiv论文的ID，比如1812.10695",
                     "Function": HotReload(Latex翻译中文并重新编译PDF),
-                }
-            }
-        )
-        function_plugins.update(
-            {
+                },
                 "本地Latex论文精细翻译（上传Latex项目）[需Latex]": {
                     "Group": "学术",
                     "Color": "stop",

diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py
@@ -139,7 +139,7 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
     # 基本信息：功能、贡献者
     chatbot.append([
         "函数插件功能？",
-        "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。（注意，此插件不调用Latex，如果有Latex环境，请使用“Latex英文纠错+高亮”插件）"])
+        "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky。（注意，此插件不调用Latex，如果有Latex环境，请使用「Latex英文纠错+高亮修正位置(需Latex)插件」"])
     yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
     # 尝试导入依赖，如果缺少依赖，则给出安装建议

diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
@@ -284,8 +284,7 @@ def _req_gpt(index, inputs, history, sys_prompt):
         # 在前端打印些好玩的东西
         for thread_index, _ in enumerate(worker_done):
             print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
-                replace('\n', '').replace('`', '.').replace(
-                    ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
+                replace('\n', '').replace('`', '.').replace(' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
             observe_win.append(print_something_really_funny)
         # 在前端打印些好玩的东西
         stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n' 

diff --git a/crazy_functions/diagram_fns/file_tree.py b/crazy_functions/diagram_fns/file_tree.py
@@ -0,0 +1,122 @@
+import os
+from textwrap import indent
+
+class FileNode:
+    def __init__(self, name):
+        self.name = name
+        self.children = []
+        self.is_leaf = False
+        self.level = 0
+        self.parenting_ship = []
+        self.comment = ""
+        self.comment_maxlen_show = 50
+
+    @staticmethod
+    def add_linebreaks_at_spaces(string, interval=10):
+        return '\n'.join(string[i:i+interval] for i in range(0, len(string), interval))
+
+    def sanitize_comment(self, comment):
+        if len(comment) > self.comment_maxlen_show: suf = '...'
+        else: suf = ''
+        comment = comment[:self.comment_maxlen_show]
+        comment = comment.replace('\"', '').replace('`', '').replace('\n', '').replace('`', '').replace('$', '')
+        comment = self.add_linebreaks_at_spaces(comment, 10)
+        return '`' + comment + suf + '`'
+
+    def add_file(self, file_path, file_comment):
+        directory_names, file_name = os.path.split(file_path)
+        current_node = self
+        level = 1
+        if directory_names == "":
+            new_node = FileNode(file_name)
+            current_node.children.append(new_node)
+            new_node.is_leaf = True
+            new_node.comment = self.sanitize_comment(file_comment)
+            new_node.level = level
+            current_node = new_node
+        else:
+            dnamesplit = directory_names.split(os.sep)
+            for i, directory_name in enumerate(dnamesplit):
+                found_child = False
+                level += 1
+                for child in current_node.children:
+                    if child.name == directory_name:
+                        current_node = child
+                        found_child = True
+                        break
+                if not found_child:
+                    new_node = FileNode(directory_name)
+                    current_node.children.append(new_node)
+                    new_node.level = level - 1
+                    current_node = new_node
+            term = FileNode(file_name)
+            term.level = level
+            term.comment = self.sanitize_comment(file_comment)
+            term.is_leaf = True
+            current_node.children.append(term)
+
+    def print_files_recursively(self, level=0, code="R0"):
+        print('    '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
+        for j, child in enumerate(self.children):
+            child.print_files_recursively(level=level+1, code=code+str(j))
+            self.parenting_ship.extend(child.parenting_ship)
+            p1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
+            p2 = """ --> """
+            p3 = f"""{code+str(j)}[\"🗎{child.name}\"]""" if child.is_leaf else f"""{code+str(j)}[[\"📁{child.name}\"]]"""
+            edge_code = p1 + p2 + p3
+            if edge_code in self.parenting_ship:
+                continue
+            self.parenting_ship.append(edge_code)
+        if self.comment != "":
+            pc1 = f"""{code}[\"🗎{self.name}\"]""" if self.is_leaf else f"""{code}[[\"📁{self.name}\"]]"""
+            pc2 = f""" -.-x """
+            pc3 = f"""C{code}[\"{self.comment}\"]:::Comment"""
+            edge_code = pc1 + pc2 + pc3
+            self.parenting_ship.append(edge_code)
+
+
+MERMAID_TEMPLATE = r"""
+```mermaid
+flowchart LR
+    %% <gpt_academic_hide_mermaid_code> 一个特殊标记，用于在生成mermaid图表时隐藏代码块
+    classDef Comment stroke-dasharray: 5 5
+    subgraph {graph_name}
+{relationship}
+    end
+```
+"""
+
+def build_file_tree_mermaid_diagram(file_manifest, file_comments, graph_name):
+    # Create the root node
+    file_tree_struct = FileNode("root")
+    # Build the tree structure
+    for file_path, file_comment in zip(file_manifest, file_comments):
+        file_tree_struct.add_file(file_path, file_comment)
+    file_tree_struct.print_files_recursively()
+    cc = "\n".join(file_tree_struct.parenting_ship)
+    ccc = indent(cc, prefix=" "*8)
+    return MERMAID_TEMPLATE.format(graph_name=graph_name, relationship=ccc)
+
+if __name__ == "__main__":
+    # File manifest
+    file_manifest = [
+        "cradle_void_terminal.ipynb",
+        "tests/test_utils.py",
+        "tests/test_plugins.py",
+        "tests/test_llms.py",
+        "config.py",
+        "build/ChatGLM-6b-onnx-u8s8/chatglm-6b-int8-onnx-merged/model_weights_0.bin",
+        "crazy_functions/latex_fns/latex_actions.py",
+        "crazy_functions/latex_fns/latex_toolbox.py"
+    ]
+    file_comments = [
+        "根据位置和名称，可能是一个模块的初始化文件根据位置和名称，可能是一个模块的初始化文件根据位置和名称，可能是一个模块的初始化文件",
+        "包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器包含一些用于文本处理和模型微调的函数和装饰器",
+        "用于构建HTML报告的类和方法用于构建HTML报告的类和方法用于构建HTML报告的类和方法",
+        "包含了用于文本切分的函数，以及处理PDF文件的示例代码包含了用于文本切分的函数，以及处理PDF文件的示例代码包含了用于文本切分的函数，以及处理PDF文件的示例代码",
+        "用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数用于解析和翻译PDF文件的功能和相关辅助函数",
+        "是一个包的初始化文件，用于初始化包的属性和导入模块是一个包的初始化文件，用于初始化包的属性和导入模块是一个包的初始化文件，用于初始化包的属性和导入模块",
+        "用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
+        "包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
+    ]
+    print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py
@@ -83,7 +83,8 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
             history=this_iteration_history_feed,   # 迭代之前的分析
             sys_prompt="你是一个程序架构分析师，正在分析一个项目的源代码。" + sys_prompt_additional)
 
-        summary = "请用一句话概括这些文件的整体功能"
+        diagram_code = make_diagram(this_iteration_files, result, this_iteration_history_feed)
+        summary = "请用一句话概括这些文件的整体功能。\n\n" + diagram_code
         summary_result = yield from request_gpt_model_in_new_thread_with_ui_alive(
             inputs=summary, 
             inputs_show_user=summary, 
@@ -104,6 +105,9 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
     chatbot.append(("完成了吗？", res))
     yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
 
+def make_diagram(this_iteration_files, result, this_iteration_history_feed):
+    from crazy_functions.diagram_fns.file_tree import build_file_tree_mermaid_diagram
+    return build_file_tree_mermaid_diagram(this_iteration_history_feed[0::2], this_iteration_history_feed[1::2], "项目示意图")
 
 @CatchException
 def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):

diff --git a/shared_utils/advanced_markdown_format.py b/shared_utils/advanced_markdown_format.py
@@ -301,6 +301,17 @@ def close_up_code_segment_during_stream(gpt_reply):
     else:
         return gpt_reply
 
+def simple_markdown_convertion(txt):
+    pre = '<div class="markdown-body">'
+    suf = "</div>"
+    if txt.startswith(pre) and txt.endswith(suf):
+        return txt  # 已经被转化过，不需要再次转化
+    txt = markdown.markdown(
+        txt,
+        extensions=["pymdownx.superfences", "tables", "pymdownx.highlight"],
+        extension_configs=code_highlight_configs,
+    )
+    return pre + txt + suf
 
 def format_io(self, y):
     """
@@ -319,13 +330,9 @@ def format_io(self, y):
         gpt_reply = close_up_code_segment_during_stream(gpt_reply)
     # 处理提问与输出
     y[-1] = (
-        None
-        if i_ask is None
-        else markdown.markdown(
-            i_ask,
-            extensions=["pymdownx.superfences", "tables", "pymdownx.highlight"],
-            extension_configs=code_highlight_configs,
-        ),
+        # 输入部分
+        None if i_ask is None else simple_markdown_convertion(i_ask),
+        # 输出部分
         None if gpt_reply is None else markdown_convertion(gpt_reply),
     )
     return y