janhq · tikikun · Nov 28, 2023 · Nov 28, 2023 · Nov 28, 2023
diff --git a/docs/docs/features/load-unload.md b/docs/docs/features/load-unload.md
@@ -70,8 +70,8 @@ In case you got error while loading models. Please check for the correct model p
 | `ctx_len`          | Integer | The context length for the model operations.                 |
 | `embedding`        | Boolean | Whether to use embedding in the model.                       |
 | `n_parallel`       | Integer | The number of parallel operations.|
-|`cpu_threads`|Integer|The number of threads for CPU inference.|
 | `cont_batching`    | Boolean | Whether to use continuous batching.                          |
+|`cpu_threads`|Integer|The number of threads for CPU inference.|
 | `user_prompt`      | String  | The prompt to use for the user.                              |
 | `ai_prompt`        | String  | The prompt to use for the AI assistant.                      |
 | `system_prompt`    | String  | The prompt for system rules.                          |

diff --git a/docs/openapi/NitroAPI.yaml b/docs/openapi/NitroAPI.yaml
@@ -441,10 +441,6 @@ components:
               default: true
               nullable: true
               description: Determines if output generation is in a streaming manner.
-            cache_prompt:
-              type: boolean
-              default: true
-              description: Optimize performance in repeated or similar requests.
             temp:
               type: number
               default: 0.7
@@ -585,10 +581,6 @@ components:
           min: 0
           max: 1
           description: Set probability threshold for more relevant outputs
-        cache_prompt:
-          type: boolean
-          default: true
-          description: Optimize performance in repeated or similar requests.
     ChatCompletionResponse:
       type: object
       description: Description of the response structure