From 14f18201a29f1f65e076fb143f6e9337684f296d Mon Sep 17 00:00:00 2001
From: william <william@octet.pro>
Date: Sun, 25 Feb 2024 01:31:49 +0800
Subject: [PATCH] Update Maven version

---
 CHANGELOG.md            |  7 ++---
 README.Zh_CN.md         | 60 ++++++++++++++++++++--------------------
 README.md               | 60 ++++++++++++++++++++--------------------
 docs/parameters.md      | 61 +++++++++++++++++++++++------------------
 llama-java-core/pom.xml |  2 +-
 octet-chat-app/pom.xml  |  2 +-
 pom.xml                 |  4 +--
 7 files changed, 101 insertions(+), 95 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e4da29..3be0750 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,9 @@
 ☕️ __LLaMA-Java-Core__
 
+- Support dynamic temperature sampling.
 - Update llama-java libs.
 
 🤖 __Octet-Chat-App__
 
-- Add WebUI support.
-- Rename project name.
-- Optimize open api.
-- Fix API response result parsing issue.
+- Optimize auto agent.
+- Add openapi docs.
diff --git a/README.Zh_CN.md b/README.Zh_CN.md
index a9d2580..b96991b 100644
--- a/README.Zh_CN.md
+++ b/README.Zh_CN.md
@@ -30,6 +30,8 @@
 
 - [X] 🚀 新增自定义AI角色、优化OpenAPI
 - [X] 🚀 新增AI智能体，可调用插件的能力
+- [X] 🚀 支持动态温度采样
+- [X] 🚀 Octet-chat-app 增加了 WebUI
 
 </details>
 
@@ -85,6 +87,24 @@ __如何使用__
 java -jar octet-chat-app.jar --character YOUR_CHARACTER
 ```
 
+> [!TIP]
+>
+> 使用 `help` 查看更多参数，示例如下：
+
+```bash
+java -jar octet-chat-app.jar --help
+
+usage: Octet.Chat
+    --app <arg>          App launch type: cli | api (default: cli).
+ -c,--completions        Use completions mode.
+ -ch,--character <arg>   Load the specified AI character, default:
+                         llama2-chat.
+ -h,--help               Show this help message and exit.
+ -q,--questions <arg>    Load the specified user question list, example:
+                         /PATH/questions.txt.
+```
+
+
 ### 🚀 AI Agent
 
 > [!NOTE]
@@ -95,11 +115,6 @@ __如何使用__
 
 下载 `Qwen-chat` 模型，编辑 [`octet.json`](octet-chat-app/characters/octet.json) 设置模型文件路径，将 `agent_mode` 修改为 `true` 即可开启智能体模式。
 
-运行命令行交互，开始聊天：
-
-```bash
-java -jar octet-chat-app.jar --character "Assistant Octet"
-```
 
 * 目前实现了两个插件，作为示例你可以继续丰富扩展它们。
 
@@ -113,31 +128,13 @@ java -jar octet-chat-app.jar --character "Assistant Octet"
 ![Octet Agent](docs/agent.png)
 
 
-> [!TIP]
->
-> 使用 `help` 查看更多参数，示例如下：
-
-```bash
-java -jar octet-chat-app.jar --help
-
-usage: Octet.Chat
-    --app <arg>          App launch type: cli | api (default: cli).
- -c,--completions        Use completions mode.
- -ch,--character <arg>   Load the specified AI character, default:
-                         llama2-chat.
- -h,--help               Show this help message and exit.
- -q,--questions <arg>    Load the specified user question list, example:
-                         /PATH/questions.txt.
-```
-
-
-### 🖥 API服务
+### 🖥 Web UI
 
 __如何使用__
 
-和命令行交互一样，首先编辑 `characters.template.json` 设置一个自定义的AI角色。
+和命令行交互一样，首先设置一个自定义的AI角色。
 
-启动服务：
+启动服务，打开浏览器开始聊天，默认地址：`http://YOUR_IP_ADDR:8152/`
 
 ```bash
 # Default URL: http://YOUR_IP_ADDR:8152/
@@ -146,15 +143,18 @@ cd <YOUR_PATH>/octet-chat-app
 bash app_server.sh start YOUR_CHARACTER
 ```
 
-现在你可以将API服务集成到你的应用中，例如：`WebUI`、`App`、`Wechat`等。
-
 ![webui.png](docs/webui.png)
 
+
+> [!TIP]
+>
+> 你也可以将API服务集成到你的应用中，例如：`VsCode`、`App`、`Wechat`等。
+
 <details>
 
 <summary>如何调用API</summary>
 
-> `POST` **/v1/chat/completions**
+> Api docs: http://127.0.0.1:8152/swagger-ui.html
 
 ```shell
 curl --location 'http://127.0.0.1:8152/v1/chat/completions' \
@@ -211,7 +211,7 @@ __角色配置__
 > [!IMPORTANT]
 > 
 > - 本项目不提供任何模型，请自行获取模型文件并遵守相关协议。
-> - 请勿将本项目用于非法用途，包括但不限于商业用途、盈利用途、以及违反中国法律法规的用途。
+> - 请勿将本项目用于非法用途，包括但不限于商业用途、盈利用途、以及违反法律法规的用途。
 > - 因使用本项目所产生的任何法律责任，由使用者自行承担，本项目不承担任何法律责任。
 
 ## 问题反馈
diff --git a/README.md b/README.md
index a7f53ce..c53d4a3 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,8 @@ You can use it to deploy your own private services, supports the `Llama2` and `G
 
 - [X] 🚀 Added custom AI character and optimized OpenAPI
 - [X] 🚀 Added AI Agent and implemented Function calling
+- [X] 🚀 Supported dynamic temperature sampling.
+- [X] 🚀 Added WebUI to octet-chat-app.
 
 </details>
 
@@ -84,6 +86,24 @@ Edit `characters.template.json` to set a custom AI character. Run command line i
 java -jar octet-chat-app.jar --character YOUR_CHARACTER
 ```
 
+> [!TIP]
+>
+> Use `help` to view more parameters, for example:
+
+```bash
+java -jar octet-chat-app.jar --help
+
+usage: Octet.Chat
+    --app <arg>          App launch type: cli | api (default: cli).
+ -c,--completions        Use completions mode.
+ -ch,--character <arg>   Load the specified AI character, default:
+                         llama2-chat.
+ -h,--help               Show this help message and exit.
+ -q,--questions <arg>    Load the specified user question list, example:
+                         /PATH/questions.txt.
+```
+
+
 ### 🚀 AI Agent
 
 > [!NOTE]
@@ -94,11 +114,6 @@ __How to use__
 
 Download the `Qwen-chat` model, edit [`octet.json`](octet-chat-app/characters/octet.json) to set the model file path, and change `agent_mode` to `true` to start the agent mode.
 
-Run command line interaction to start chatting:
-
-```bash
-java -jar octet-chat-app.jar --character "Assistant Octet"
-```
 
 * Two plugins are currently implemented, and as examples you can continue to enrich them.
 
@@ -112,31 +127,13 @@ java -jar octet-chat-app.jar --character "Assistant Octet"
 ![Octet Agent](docs/agent.png)
 
 
-> [!TIP]
->
-> Use `help` to view more parameters, for example:
-
-```bash
-java -jar octet-chat-app.jar --help
-
-usage: Octet.Chat
-    --app <arg>          App launch type: cli | api (default: cli).
- -c,--completions        Use completions mode.
- -ch,--character <arg>   Load the specified AI character, default:
-                         llama2-chat.
- -h,--help               Show this help message and exit.
- -q,--questions <arg>    Load the specified user question list, example:
-                         /PATH/questions.txt.
-```
-
-
-### 🖥 API Services
+### 🖥 Web UI
 
 __How to use__
 
-Just like CLI interaction, first edit `characters.template.json` to set a custom AI character.
+Just like CLI interaction, set a custom AI character and Launch the app.
 
-Launch the app:
+open browser enjoy it now `http://YOUR_IP_ADDR:8152/`
 
 ```bash
 # Default URL: http://YOUR_IP_ADDR:8152/
@@ -145,15 +142,18 @@ cd <YOUR_PATH>/octet-chat-app
 bash app_server.sh start YOUR_CHARACTER
 ```
 
-Now it can be integrated into your services, such as `WebUI`, `App`, `Wechat`, etc.
-
 ![webui.png](docs/webui.png)
 
+
+> [!TIP]
+>
+> It can be integrated into your services, such as `VsCode`, `App`, `Wechat`, etc.
+
 <details>
 
 <summary>How to call API</summary>
 
-> `POST` **/v1/chat/completions**
+> Api docs: http://127.0.0.1:8152/swagger-ui.html
 
 ```shell
 curl --location 'http://127.0.0.1:8152/v1/chat/completions' \
@@ -210,7 +210,7 @@ __Characters config__
 > [!IMPORTANT]
 >
 > - This project does not provide any models. Please obtain the model files yourself and comply with relevant agreements.
-> - Please do not use this project for illegal purposes, including but not limited to commercial use, profit-making use, or use that violates Chinese laws and regulations.
+> - Please do not use this project for illegal purposes, including but not limited to commercial use, profit-making use, or use that violates laws and regulations.
 > - Any legal liability arising from the use of this project shall be borne by the user, and this project shall not bear any legal liability.
 
 ## Feedback
diff --git a/docs/parameters.md b/docs/parameters.md
index 5febfaf..b2c7c21 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -4,7 +4,8 @@ The following is a list of all the parameters involved in this project.
 
 > [!NOTE]
 > Other reference
-> documents: <a href="https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig">
+>
+documents: <a href="https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig">
 > Transformers docs</a>.
 
 ### Model parameter
@@ -44,6 +45,7 @@ The following is a list of all the parameters involved in this project.
 | yarn_beta_slow    | 1.0     | YaRN high correction dim.                                                                                                                                                 |
 | yarn_orig_ctx     | 0       | YaRN original context size.                                                                                                                                               |
 | offload_kqv       | true    | whether to offload the KQV ops (including the KV cache) to GPU.                                                                                                           |
+| do_pooling        | true    | whether to pool (sum) embedding results by sequence id (ignored if no pooling layer).                                                                                     
 
 **JSON template**
 
@@ -81,37 +83,40 @@ The following is a list of all the parameters involved in this project.
   "yarn_beta_fast": 32.0,
   "yarn_beta_slow": 1.0,
   "yarn_orig_ctx": 0,
-  "offload_kqv": true
+  "offload_kqv": true,
+  "do_pooling": true
 }
 ```
 
 ### Generate parameter
 
-| Parameter          | Default   | Description                                                                                                                                                |
-|--------------------|-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| temperature        | 0.8       | Adjust the randomness of the generated text.                                                                                                               |
-| repeat_penalty     | 1.1       | Control the repetition of token sequences in the generated text.                                                                                           |
-| penalize_nl        | true      | Disable penalization for newline tokens when applying the repeat penalty.                                                                                  |
-| frequency_penalty  | 0.0       | Repeat alpha frequency penalty.                                                                                                                            |
-| presence_penalty   | 0.0       | Repeat alpha presence penalty.                                                                                                                             |
-| top_k              | 40        | **TOP-K Sampling** Limit the next token selection to the K most probable tokens.                                                                           |
-| top_p              | 0.9       | **TOP-P Sampling** Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.                                 |
-| tsf                | 1.0       | **Tail Free Sampling (TFS)** Enable tail free sampling with parameter z.                                                                                   |
-| typical            | 1.0       | **Typical Sampling** Enable typical sampling sampling with parameter p.                                                                                    |
-| min_p              | 0.05      | **Min P Sampling** Sets a minimum base probability threshold for token selection.                                                                          |
-| mirostat_mode      | DISABLED  | **Mirostat Sampling** Enable Mirostat sampling, controlling perplexity during text generation. `DISABLED = disabled`, `V1 = Mirostat`, `V2 = Mirostat 2.0` |
-| mirostat_eta       | 0.1       | **Mirostat Sampling** Set the Mirostat learning rate, parameter eta.                                                                                       |
-| mirostat_tau       | 5.0       | **Mirostat Sampling** Set the Mirostat target entropy, parameter tau.                                                                                      |
-| grammar_rules      | /         | Specify a grammar (defined inline or in a file) to constrain model output to a specific format.                                                            |
-| max_new_token_size | 512       | Maximum new token generation size.                                                                                                                         |
-| last_tokens_size   | 64        | Maximum number of tokens to keep in the last_n_tokens deque.                                                                                               |
-| verbose_prompt     | false     | Print the prompt before generating text.                                                                                                                   |
-| user               | User      | Specify user nickname.                                                                                                                                     |
-| assistant          | Assistant | Specify bot nickname.                                                                                                                                      |
-| add_bos            | true      | Add BOS token.                                                                                                                                             |
-| special_tokens     | true      | Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.                                                   |
-| logit_bias         | Assistant | Adjust the probability distribution of words.                                                                                                              |
-| stopping_word      | Assistant | Control the stop word list for generating stops, with values that can be text or token IDs.                                                                |
+| Parameter          | Default   | Description                                                                                                                                                                  |
+|--------------------|-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| temperature        | 0.8       | Adjust the randomness of the generated text.                                                                                                                                 |
+| repeat_penalty     | 1.1       | Control the repetition of token sequences in the generated text.                                                                                                             |
+| penalize_nl        | true      | Disable penalization for newline tokens when applying the repeat penalty.                                                                                                    |
+| frequency_penalty  | 0.0       | Repeat alpha frequency penalty.                                                                                                                                              |
+| presence_penalty   | 0.0       | Repeat alpha presence penalty.                                                                                                                                               |
+| top_k              | 40        | **TOP-K Sampling** Limit the next token selection to the K most probable tokens.                                                                                             |
+| top_p              | 0.9       | **TOP-P Sampling** Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.                                                   |
+| tsf                | 1.0       | **Tail Free Sampling (TFS)** Enable tail free sampling with parameter z.                                                                                                     |
+| typical            | 1.0       | **Typical Sampling** Enable typical sampling sampling with parameter p.                                                                                                      |
+| min_p              | 0.05      | **Min P Sampling** Sets a minimum base probability threshold for token selection.                                                                                            |
+| mirostat_mode      | DISABLED  | **Mirostat Sampling** Enable Mirostat sampling, controlling perplexity during text generation. `DISABLED = disabled`, `V1 = Mirostat`, `V2 = Mirostat 2.0`                   |
+| mirostat_eta       | 0.1       | **Mirostat Sampling** Set the Mirostat learning rate, parameter eta.                                                                                                         |
+| mirostat_tau       | 5.0       | **Mirostat Sampling** Set the Mirostat target entropy, parameter tau.                                                                                                        |
+| dynatemp_range     | 0.0       | **Dynamic Temperature Sampling** Dynamic temperature range. The final temperature will be in the range of (temperature - dynatemp_range) and (temperature + dynatemp_range). |
+| dynatemp_exponent  | 1.0       | **Dynamic Temperature Sampling** Dynamic temperature exponent.                                                                                                               |
+| grammar_rules      | /         | Specify a grammar (defined inline or in a file) to constrain model output to a specific format.                                                                              |
+| max_new_token_size | 512       | Maximum new token generation size.                                                                                                                                           |
+| last_tokens_size   | 64        | Maximum number of tokens to keep in the last_n_tokens deque.                                                                                                                 |
+| verbose_prompt     | false     | Print the prompt before generating text.                                                                                                                                     |
+| user               | User      | Specify user nickname.                                                                                                                                                       |
+| assistant          | Assistant | Specify bot nickname.                                                                                                                                                        |
+| add_bos            | true      | Add BOS token.                                                                                                                                                               |
+| special_tokens     | true      | Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.                                                                     |
+| logit_bias         | Assistant | Adjust the probability distribution of words.                                                                                                                                |
+| stopping_word      | Assistant | Control the stop word list for generating stops, with values that can be text or token IDs.                                                                                  |
 
 **JSON template**
 
@@ -130,6 +135,8 @@ The following is a list of all the parameters involved in this project.
   "mirostat_mode": "DISABLED",
   "mirostat_eta": 0.1,
   "mirostat_tau": 5.0,
+  "dynatemp_range": 0.0,
+  "dynatemp_exponent": 1.0,
   "grammar_rules": null,
   "max_new_token_size": 512,
   "last_tokens_size": 64,
diff --git a/llama-java-core/pom.xml b/llama-java-core/pom.xml
index 0b7c15d..1638855 100644
--- a/llama-java-core/pom.xml
+++ b/llama-java-core/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>chat.octet</groupId>
         <artifactId>octet-chat</artifactId>
-        <version>1.3.8</version>
+        <version>1.3.9</version>
     </parent>
 
     <artifactId>llama-java-core</artifactId>
diff --git a/octet-chat-app/pom.xml b/octet-chat-app/pom.xml
index 2bce0dc..3764b5d 100644
--- a/octet-chat-app/pom.xml
+++ b/octet-chat-app/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>chat.octet</groupId>
         <artifactId>octet-chat</artifactId>
-        <version>1.3.8</version>
+        <version>1.3.9</version>
     </parent>
 
     <properties>
diff --git a/pom.xml b/pom.xml
index de96125..2ca5865 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
     <groupId>chat.octet</groupId>
     <artifactId>octet-chat</artifactId>
     <packaging>pom</packaging>
-    <version>1.3.8</version>
+    <version>1.3.9</version>
 
     <properties>
         <maven.compiler.source>8</maven.compiler.source>
@@ -19,7 +19,7 @@
         <lombok.version>1.18.26</lombok.version>
         <guava.version>32.1.2-jre</guava.version>
         <spring-boot.version>2.7.16</spring-boot.version>
-        <llama.java.version>1.3.8</llama.java.version>
+        <llama.java.version>1.3.9</llama.java.version>
     </properties>
 
     <name>octet-chat</name>