chore: update model list (#550)

* chore: update model list * fix: golang ut * chore: update go version
baidubce · May 24, 2024 · 2ab4358 · 2ab4358
1 parent 2a5d423
commit 2ab4358
Show file tree

Hide file tree

Showing 6 changed files with 155 additions and 27 deletions.
diff --git a/go/Makefile b/go/Makefile
@@ -1,2 +1,4 @@
 test:
-	cd qianfan && go test -race -timeout=120s -v -coverprofile=coverage.out ./...
+	cd qianfan && go test -race -timeout=120s -v -coverprofile=coverage.out ./...
+format:
+	cd qianfan && go fmt ./...
diff --git a/go/qianfan/chat_completion.go b/go/qianfan/chat_completion.go
@@ -88,8 +88,17 @@ var ChatModelEndpoint = map[string]string{
 	"ERNIE-3.5-8K":                 "/chat/completions",
 	"ERNIE-Bot":                    "/chat/completions",
 	"ERNIE-4.0-8K":                 "/chat/completions_pro",
+	"ERNIE-4.0-8K-Preview":         "/chat/ernie-4.0-8k-preview",
+	"ERNIE-4.0-8K-Preview-0518":    "/chat/completions_adv_pro",
+	"ERNIE-4.0-preemptible":        "/chat/completions_pro_preemptible",
+	"ERNIE-4.0-8K-0329":            "/chat/ernie-4.0-8k-0329",
+	"ERNIE-4.0-8K-0104":            "/chat/ernie-4.0-8k-0104",
 	"ERNIE-Bot-4":                  "/chat/completions_pro",
 	"ERNIE-Bot-8k":                 "/chat/ernie_bot_8k",
+	"ERNIE-3.5-128K":               "/chat/ernie-3.5-128k",
+	"ERNIE-3.5-8K-preview":         "/chat/ernie-3.5-8k-preview",
+	"ERNIE-3.5-preemptible":        "/chat/completions_preemptible",
+	"ERNIE-3.5-8K-0329":            "/chat/ernie-3.5-8k-0329",
 	"ERNIE-3.5-4K-0205":            "/chat/ernie-3.5-4k-0205",
 	"ERNIE-3.5-8K-0205":            "/chat/ernie-3.5-8k-0205",
 	"ERNIE-3.5-8K-1222":            "/chat/ernie-3.5-8k-1222",
@@ -98,20 +107,28 @@ var ChatModelEndpoint = map[string]string{
 	"ERNIE-Speed-8K":               "/chat/ernie_speed",
 	"ERNIE-Speed-128K":             "/chat/ernie-speed-128k",
 	"ERNIE Speed-AppBuilder":       "/chat/ai_apaas",
+	"ERNIE-Tiny-8K":                "/chat/ernie-tiny-8k",
+	"ERNIE-Function-8K":            "/chat/ernie-func-8k",
+	"ERNIE-Character-8K":           "/chat/ernie-char-8k",
 	"ERNIE-Bot-turbo-AI":           "/chat/ai_apaas",
 	"EB-turbo-AppBuilder":          "/chat/ai_apaas",
 	"BLOOMZ-7B":                    "/chat/bloomz_7b1",
 	"Llama-2-7b-chat":              "/chat/llama_2_7b",
 	"Llama-2-13b-chat":             "/chat/llama_2_13b",
 	"Llama-2-70b-chat":             "/chat/llama_2_70b",
-	"Qianfan-BLOOMZ-7B-compressed": "/chat/qianfan_bloomz_7b_compressed",
 	"Qianfan-Chinese-Llama-2-7B":   "/chat/qianfan_chinese_llama_2_7b",
+	"Qianfan-Chinese-Llama-2-13B":  "/chat/qianfan_chinese_llama_2_13b",
+	"Qianfan-Chinese-Llama-2-70B":  "/chat/qianfan_chinese_llama_2_70b",
+	"Meta-Llama-3-8B":              "/chat/llama_3_8b",
+	"Meta-Llama-3-70B":             "/chat/llama_3_70b",
+	"Qianfan-BLOOMZ-7B-compressed": "/chat/qianfan_bloomz_7b_compressed",
 	"ChatGLM2-6B-32K":              "/chat/chatglm2_6b_32k",
 	"AquilaChat-7B":                "/chat/aquilachat_7b",
 	"XuanYuan-70B-Chat-4bit":       "/chat/xuanyuan_70b_chat",
-	"Qianfan-Chinese-Llama-2-13B":  "/chat/qianfan_chinese_llama_2_13b",
 	"ChatLaw":                      "/chat/chatlaw",
 	"Yi-34B-Chat":                  "/chat/yi_34b_chat",
+	"Mixtral-8x7B-Instruct":        "/chat/mixtral_8x7b_instruct",
+	"Gemma-7B-it":                  "/chat/gemma_7b_it",
 }
 
 // 创建一个 User 的消息

diff --git a/go/qianfan/chat_completion_test.go b/go/qianfan/chat_completion_test.go
@@ -41,6 +41,9 @@ var testEndpointList = []string{
 
 func TestChatCompletion(t *testing.T) {
 	for model, endpoint := range ChatModelEndpoint {
+		if model == "ERNIE-Function-8K" {
+			continue
+		}
 		chat := NewChatCompletion(WithModel(model))
 		resp, err := chat.Do(
 			context.Background(),
@@ -89,7 +92,7 @@ func TestChatCompletion(t *testing.T) {
 }
 
 func TestChatCompletionStream(t *testing.T) {
-	for model, endpoint := range ChatModelEndpoint {
+	for model, endpoint := range map[string]string{"ERNIE-Function-8K": "/chat/ernie-func-8k"} {
 		chat := NewChatCompletion(WithModel(model))
 		resp, err := chat.Stream(
 			context.Background(),
@@ -103,7 +106,7 @@ func TestChatCompletionStream(t *testing.T) {
 		turn_count := 0
 		for {
 			r, err := resp.Recv()
-			assert.NoError(t, err)
+			assert.NoErrorf(t, err, "model:%s, endpoint: %s", model, endpoint)
 			if resp.IsEnd {
 				break
 			}
@@ -112,7 +115,7 @@ func TestChatCompletionStream(t *testing.T) {
 			assert.NotEqual(t, r.Id, nil)
 			assert.Equal(t, r.Object, "chat.completion")
 			assert.Contains(t, r.RawResponse.Request.URL.Path, endpoint)
-			assert.Contains(t, r.Result, "你好")
+			assert.True(t, strings.Contains(r.Result, "你好") || strings.Contains(r.Result, "上海"))
 			req, err := getRequestBody[ChatCompletionRequest](r.RawResponse)
 			assert.NoError(t, err)
 			assert.Equal(t, req.Messages[0].Content, "你好")

diff --git a/go/qianfan/version.go b/go/qianfan/version.go
@@ -26,5 +26,5 @@
 package qianfan
 
 // SDK 版本
-const Version = "v0.0.5"
+const Version = "v0.0.6"
 const versionIndicator = "qianfan_go_sdk_" + Version
diff --git a/python/qianfan/resources/llm/chat_completion.py b/python/qianfan/resources/llm/chat_completion.py
@@ -137,6 +137,52 @@ def _supported_models(cls) -> Dict[str, QfLLMInfo]:
                 input_price_per_1k_tokens=0.12,
                 output_price_per_1k_tokens=0.12,
             ),
+            "ERNIE-4.0-8K-0329": QfLLMInfo(
+                endpoint="/chat/ernie-4.0-8k-0329",
+                required_keys={"messages"},
+                optional_keys={
+                    "stream",
+                    "temperature",
+                    "top_p",
+                    "penalty_score",
+                    "functions",
+                    "system",
+                    "user_id",
+                    "stop",
+                    "disable_search",
+                    "enable_citation",
+                    "response_format",
+                    "max_output_tokens",
+                    "enable_trace",
+                },
+                max_input_chars=20000,
+                max_input_tokens=5120,
+                input_price_per_1k_tokens=0.12,
+                output_price_per_1k_tokens=0.12,
+            ),
+            "ERNIE-4.0-8K-0104": QfLLMInfo(
+                endpoint="/chat/ernie-4.0-8k-0104",
+                required_keys={"messages"},
+                optional_keys={
+                    "stream",
+                    "temperature",
+                    "top_p",
+                    "penalty_score",
+                    "functions",
+                    "system",
+                    "user_id",
+                    "stop",
+                    "disable_search",
+                    "enable_citation",
+                    "response_format",
+                    "max_output_tokens",
+                    "enable_trace",
+                },
+                max_input_chars=20000,
+                max_input_tokens=5120,
+                input_price_per_1k_tokens=0.12,
+                output_price_per_1k_tokens=0.12,
+            ),
             "ERNIE-4.0-preemptible": QfLLMInfo(
                 endpoint="/chat/completions_pro_preemptible",
                 required_keys={"messages"},
@@ -160,6 +206,29 @@ def _supported_models(cls) -> Dict[str, QfLLMInfo]:
                 input_price_per_1k_tokens=0.048,
                 output_price_per_1k_tokens=0.048,
             ),
+            "ERNIE-4.0-8K-Preview-0518": QfLLMInfo(
+                endpoint="/chat/completions_adv_pro",
+                required_keys={"messages"},
+                optional_keys={
+                    "stream",
+                    "temperature",
+                    "top_p",
+                    "penalty_score",
+                    "functions",
+                    "system",
+                    "user_id",
+                    "stop",
+                    "disable_search",
+                    "enable_citation",
+                    "response_format",
+                    "max_output_tokens",
+                    "enable_trace",
+                },
+                max_input_chars=20000,
+                max_input_tokens=5120,
+                input_price_per_1k_tokens=0.12,
+                output_price_per_1k_tokens=0.12,
+            ),
             "ERNIE-4.0-8K-preview": QfLLMInfo(
                 endpoint="/chat/ernie-4.0-8k-preview",
                 required_keys={"messages"},
@@ -358,6 +427,31 @@ def _supported_models(cls) -> Dict[str, QfLLMInfo]:
                 input_price_per_1k_tokens=0.012,
                 output_price_per_1k_tokens=0.012,
             ),
+            "ERNIE-3.5-8K-0329": QfLLMInfo(
+                endpoint="/chat/ernie-3.5-8k-0329",
+                required_keys={"messages"},
+                optional_keys={
+                    "stream",
+                    "temperature",
+                    "top_p",
+                    "penalty_score",
+                    "functions",
+                    "system",
+                    "user_id",
+                    "user_setting",
+                    "stop",
+                    "disable_search",
+                    "enable_citation",
+                    "max_output_tokens",
+                    "response_format",
+                    "tool_choice",
+                    "enable_trace",
+                },
+                max_input_chars=8000,
+                max_input_tokens=2048,
+                input_price_per_1k_tokens=0.012,
+                output_price_per_1k_tokens=0.012,
+            ),
             "ERNIE-Speed-8K": QfLLMInfo(
                 endpoint="/chat/ernie_speed",
                 required_keys={"messages"},
@@ -700,6 +794,25 @@ def _supported_models(cls) -> Dict[str, QfLLMInfo]:
                 input_price_per_1k_tokens=0.006,
                 output_price_per_1k_tokens=0.006,
             ),
+            "Qianfan-Chinese-Llama-2-70B": QfLLMInfo(
+                endpoint="/chat/qianfan_chinese_llama_2_70b",
+                required_keys={"messages"},
+                optional_keys={
+                    "stream",
+                    "user_id",
+                    "temperature",
+                    "top_k",
+                    "top_p",
+                    "penalty_score",
+                    "stop",
+                    "tools",
+                    "tool_choice",
+                },
+                max_input_chars=4800,
+                max_input_tokens=None,
+                input_price_per_1k_tokens=0.006,
+                output_price_per_1k_tokens=0.006,
+            ),
             "ChatLaw": QfLLMInfo(
                 endpoint="/chat/chatlaw",
                 required_keys={"messages", "extra_parameters"},

diff --git a/python/qianfan/tests/utils/mock_server.py b/python/qianfan/tests/utils/mock_server.py
@@ -311,32 +311,25 @@ def chat(model_name):
         global _multi_func_call_round
         _multi_func_call_round += 1
         if _multi_func_call_round == 1:
-            return json_response(
-                {
-                    "id": "as-bcmt5ct4id",
-                    "object": "chat.completion",
-                    "created": 1680167072,
-                    "result": (
-                        'Action: get_current_weather\nAction Input: {"location": "上海市"}'
-                    ),
-                    "is_truncated": False,
-                    "need_clear_history": False,
-                    "usage": {
-                        "prompt_tokens": 10,
-                        "completion_tokens": 72,
-                        "total_tokens": 82,
-                    },
-                },
-                request_id,
-            )
+            ans = 'Action: get_current_weather\nAction Input: {"location": "上海市"}'
         else:
             _multi_func_call_round = 0
+            ans = "上海气温25度"
+
+        if r.get("stream"):
+            return flask.Response(
+                chat_completion_stream_response(
+                    model_name, [{"role": "assistant", "content": ans}]
+                ),
+                mimetype="text/event-stream",
+            )
+        else:
             return json_response(
                 {
                     "id": "as-bcmt5ct4ie",
                     "object": "chat.completion",
                     "created": 1680167075,
-                    "result": "上海气温25度",
+                    "result": ans,
                     "is_truncated": False,
                     "need_clear_history": False,
                     "usage": {
@@ -4130,7 +4123,7 @@ def _start_mock_server():
     run mock server
     """
     try:
-        requests.get("http://127.0.0.1:8866")
+        requests.get("http://127.0.0.1:8865")
     except Exception:
         # mock server is not running, start it
         app.run(host="0.0.0.0", port=8866, debug=True, use_reloader=False)