Merge pull request #1415 from binary-husky/frontier

Merge Frontier Branch
binary-husky · Dec 25, 2023 · 6d4a748 · 6d4a748
2 parents ca85573 + 5c7499c
commit 6d4a748
Show file tree

Hide file tree

Showing 10 changed files with 282 additions and 80 deletions.
diff --git a/config.py b/config.py
@@ -92,8 +92,9 @@
                     "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k',
                     "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4",
                     "chatglm3", "moss", "claude-2"]
-# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
-# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"]
+# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",  "gpt-3.5-random"
+# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"
+# “qwen-turbo", "qwen-plus", "qwen-max"]
 
 
 # 定义界面上“询问多个GPT模型”插件应该使用哪些模型，请从AVAIL_LLM_MODELS中选择，并在不同模型之间用`&`间隔，例如"gpt-3.5-turbo&chatglm3&azure-gpt-4"
@@ -103,7 +104,11 @@
 # 选择本地模型变体（只有当AVAIL_LLM_MODELS包含了对应本地模型时，才会起作用）
 # 如果你选择Qwen系列的模型，那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型
 # 也可以是具体的模型路径
-QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
+QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
+
+
+# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
+DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
 
 
 # 百度千帆（LLM_MODEL="qianfan"）
@@ -284,6 +289,9 @@
 │   ├── ZHIPUAI_API_KEY
 │   └── ZHIPUAI_MODEL
 │
+├── "qwen-turbo" 等通义千问大模型
+│   └──  DASHSCOPE_API_KEY
+│
 └── "newbing" Newbing接口不再稳定，不推荐使用
     ├── NEWBING_STYLE
     └── NEWBING_COOKIES
@@ -300,7 +308,7 @@
 ├── "jittorllms_pangualpha"
 ├── "jittorllms_llama"
 ├── "deepseekcoder"
-├── "qwen"
+├── "qwen-local"
 ├──  RWKV的支持见Wiki
 └── "llama2"
 

diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
@@ -431,16 +431,48 @@ def decode(self, *args, **kwargs):
         })
     except:
         print(trimmed_format_exc())
-if "qwen" in AVAIL_LLM_MODELS:
+if "qwen-local" in AVAIL_LLM_MODELS:
+    try:
+        from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
+        from .bridge_qwen_local import predict as qwen_local_ui
+        model_info.update({
+            "qwen-local": {
+                "fn_with_ui": qwen_local_ui,
+                "fn_without_ui": qwen_local_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
     try:
         from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
         from .bridge_qwen import predict as qwen_ui
         model_info.update({
-            "qwen": {
+            "qwen-turbo": {
                 "fn_with_ui": qwen_ui,
                 "fn_without_ui": qwen_noui,
                 "endpoint": None,
-                "max_token": 4096,
+                "max_token": 6144,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-plus": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "endpoint": None,
+                "max_token": 30720,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-max": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "endpoint": None,
+                "max_token": 28672,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
             }

diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py
@@ -102,20 +102,25 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
     result = ''
     json_data = None
     while True:
-        try: chunk = next(stream_response).decode()
+        try: chunk = next(stream_response)
         except StopIteration: 
             break
         except requests.exceptions.ConnectionError:
-            chunk = next(stream_response).decode() # 失败了，重试一次？再失败就没办法了。
-        if len(chunk)==0: continue
-        if not chunk.startswith('data:'): 
-            error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+        chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
+        if len(chunk_decoded)==0: continue
+        if not chunk_decoded.startswith('data:'): 
+            error_msg = get_full_error(chunk, stream_response).decode()
             if "reduce the length" in error_msg:
                 raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
             else:
                 raise RuntimeError("OpenAI拒绝了请求：" + error_msg)
-        if ('data: [DONE]' in chunk): break # api2d 正常完成
-        json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
+        if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成
+        # 提前读取一些信息 （用于判断异常）
+        if has_choices and not choice_valid:
+            # 一些垃圾第三方接口的出现这样的错误
+            continue
+        json_data = chunkjson['choices'][0]
         delta = json_data["delta"]
         if len(delta) == 0: break
         if "role" in delta: continue

diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py
@@ -1,59 +1,62 @@
-model_name = "Qwen"
-cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`"
-
-from toolbox import ProxyNetworkActivate, get_conf
-from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
-
-
-
-# ------------------------------------------------------------------------------------------------------------------------
-# 🔌💻 Local Model
-# ------------------------------------------------------------------------------------------------------------------------
-class GetQwenLMHandle(LocalLLMHandle):
-
-    def load_model_info(self):
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        self.model_name = model_name
-        self.cmd_to_install = cmd_to_install
-
-    def load_model_and_tokenizer(self):
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
-        from transformers import AutoModelForCausalLM, AutoTokenizer
-        from transformers.generation import GenerationConfig
-        with ProxyNetworkActivate('Download_LLM'):
-            model_id = get_conf('QWEN_MODEL_SELECTION')
-            self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
-            # use fp16
-            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
-            model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
-            self._model = model
-
-        return self._model, self._tokenizer
-
-    def llm_stream_generator(self, **kwargs):
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        def adaptor(kwargs):
-            query = kwargs['query']
-            max_length = kwargs['max_length']
-            top_p = kwargs['top_p']
-            temperature = kwargs['temperature']
-            history = kwargs['history']
-            return query, max_length, top_p, temperature, history
-
-        query, max_length, top_p, temperature, history = adaptor(kwargs)
-
-        for response in self._model.chat_stream(self._tokenizer, query, history=history):
-            yield response
-
-    def try_to_import_special_deps(self, **kwargs):
-        # import something that will raise error if the user does not install requirement_*.txt
-        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
-        import importlib
-        importlib.import_module('modelscope')
-
-
-# ------------------------------------------------------------------------------------------------------------------------
-# 🔌💻 GPT-Academic Interface
-# ------------------------------------------------------------------------------------------------------------------------
-predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
+import time
+import os
+from toolbox import update_ui, get_conf, update_ui_lastest_msg
+from toolbox import check_packages, report_exception
+
+model_name = 'Qwen'
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+
+    from .com_qwenapi import QwenRequestInstance
+    sri = QwenRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        check_packages(["dashscope"])
+    except:
+        yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖，安装方法```pip install --upgrade dashscope```。",
+                                         chatbot=chatbot, history=history, delay=0)
+        return
+
+    # 检查DASHSCOPE_API_KEY
+    if get_conf("DASHSCOPE_API_KEY") == "":
+        yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。",
+                                         chatbot=chatbot, history=history, delay=0)
+        return
+
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+
+    # 开始接收回复
+    from .com_qwenapi import QwenRequestInstance
+    sri = QwenRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+
+    # 总结输出
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)
diff --git a/request_llms/bridge_qwen_local.py b/request_llms/bridge_qwen_local.py
@@ -0,0 +1,59 @@
+model_name = "Qwen_Local"
+cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`"
+
+from toolbox import ProxyNetworkActivate, get_conf
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetQwenLMHandle(LocalLLMHandle):
+
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from transformers.generation import GenerationConfig
+        with ProxyNetworkActivate('Download_LLM'):
+            model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION')
+            self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
+            # use fp16
+            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
+            model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
+            self._model = model
+
+        return self._model, self._tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+
+        for response in self._model.chat_stream(self._tokenizer, query, history=history):
+            yield response
+
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        importlib.import_module('modelscope')
+
+
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)