Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit c9a2656

Browse files
Update Chat completion api docs for llama-cpp supported params
1 parent b558009 commit c9a2656

File tree

3 files changed

+79
-11
lines changed

3 files changed

+79
-11
lines changed

docs/docs/capabilities/models/model-yaml.mdx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ model_path: string
206206
|------------------------|--------------------------------------------------------------------------------------|--------------|
207207
| `cache_type` | Data type of the KV cache in llama.cpp models. Supported types are `f16`, `q8_0`, and `q4_0`, default is `f16`. | No |
208208
| `cache_enabled` |Enables caching of conversation history for reuse in subsequent requests. Default is `false` | No |
209-
209+
| `mmproj` | path to mmproj GGUF model, support for llava model | No |
210+
| `llama_model_path` | path to llm GGUF model | No |
210211
211212
These parameters will override the `model.yml` parameters when starting model through the API.
212213

docs/docs/guides/function-calling.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -438,14 +438,10 @@ User can also specify the function with enum field to the tool definition to mak
438438

439439
(*) Note that the accuracy of function calling heavily depends on the quality of the model. For small models like 8B or 12B, we should only use function calling with simple cases.
440440

441+
The function calling feature from cortex.cpp is primarily an application of prompt engineering. When tools are specified, we inject a system prompt into the conversation to facilitate this functionality.
441442

442-
443-
<!--
444-
Note: The function calling feature from cortex.cpp is primarily an application of prompt engineering. When tools are specified, we inject a system prompt into the conversation to facilitate this functionality.
445-
446443
Compatibility: This feature works best with models like llama3.1 and its derivatives, such as mistral-nemo or qwen.
447444

448445
Customization: Users have the option to manually update the system prompt to fine-tune it for specific problems or use cases. The detail implementation is in this [PR](https://github.com/janhq/cortex.cpp/pull/1472/files).
449446

450-
The full steps to mimic the function calling feature in Python using openai lib can be found [here](https://github.com/janhq/models/issues/16#issuecomment-2381129322)
451-
-->
447+
The full steps to mimic the function calling feature in Python using openai lib can be found [here](https://github.com/janhq/models/issues/16#issuecomment-2381129322).

docs/static/openapi/cortex.json

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,9 @@
227227
}
228228
}
229229
},
230-
"tags": ["Chat"]
230+
"tags": [
231+
"Chat"
232+
]
231233
}
232234
},
233235
"/v1/models/pull": {
@@ -664,7 +666,9 @@
664666
}
665667
}
666668
},
667-
"tags": ["Models"]
669+
"tags": [
670+
"Models"
671+
]
668672
}
669673
},
670674
"/v1/threads": {
@@ -2235,6 +2239,66 @@
22352239
"user": {
22362240
"type": "string",
22372241
"description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582)."
2242+
},
2243+
"dynatemp_range": {
2244+
"type": "number",
2245+
"description": "Dynamic temperature range. This parameter only supported by `llama-cpp` engine."
2246+
},
2247+
"dynatemp_exponent": {
2248+
"type": "number",
2249+
"description": "Dynamic temperature exponent. This parameter only supported by `llama-cpp` engine."
2250+
},
2251+
"top_k": {
2252+
"type": "integer",
2253+
"description": "The number of most likely tokens to consider at each step. This parameter only supported by `llama-cpp` engine."
2254+
},
2255+
"min_p": {
2256+
"type": "number",
2257+
"description": "Minimum probability threshold for token sampling. This parameter only supported by `llama-cpp` engine."
2258+
},
2259+
"tfs_z": {
2260+
"type": "number",
2261+
"description": "The z-score used for Typical token sampling. This parameter only supported by `llama-cpp` engine."
2262+
},
2263+
"typ_p": {
2264+
"type": "number",
2265+
"description": "The cumulative probability threshold used for Typical token sampling. This parameter only supported by `llama-cpp` engine."
2266+
},
2267+
"repeat_last_n": {
2268+
"type": "integer",
2269+
"description": "Number of previous tokens to penalize for repeating. This parameter only supported by `llama-cpp` engine."
2270+
},
2271+
"repeat_penalty": {
2272+
"type": "number",
2273+
"description": "Penalty for repeating tokens. This parameter only supported by `llama-cpp` engine."
2274+
},
2275+
"mirostat": {
2276+
"type": "boolean",
2277+
"description": "Enables or disables Mirostat sampling (true or false). This parameter only supported by `llama-cpp` engine."
2278+
},
2279+
"mirostat_tau": {
2280+
"type": "number",
2281+
"description": "Target entropy value for Mirostat sampling. This parameter only supported by `llama-cpp` engine."
2282+
},
2283+
"mirostat_eta": {
2284+
"type": "number",
2285+
"description": "Learning rate for Mirostat sampling. This parameter only supported by `llama-cpp` engine."
2286+
},
2287+
"penalize_nl": {
2288+
"type": "boolean",
2289+
"description": "Penalizes newline tokens (true or false). This parameter only supported by `llama-cpp` engine."
2290+
},
2291+
"ignore_eos": {
2292+
"type": "boolean",
2293+
"description": "Ignores the end-of-sequence token (true or false). This parameter only supported by `llama-cpp` engine."
2294+
},
2295+
"n_probs": {
2296+
"type": "integer",
2297+
"description": "Number of probabilities to return. This parameter only supported by `llama-cpp` engine."
2298+
},
2299+
"min_keep": {
2300+
"type": "integer",
2301+
"description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine."
22382302
}
22392303
},
22402304
"required": [
@@ -3189,7 +3253,10 @@
31893253
"description": "The display name of the model."
31903254
}
31913255
},
3192-
"required": ["model", "modelPath"]
3256+
"required": [
3257+
"model",
3258+
"modelPath"
3259+
]
31933260
},
31943261
"ImportModelResponse": {
31953262
"type": "object",
@@ -3208,7 +3275,11 @@
32083275
"example": "OK"
32093276
}
32103277
},
3211-
"required": ["message", "modelHandle", "result"]
3278+
"required": [
3279+
"message",
3280+
"modelHandle",
3281+
"result"
3282+
]
32123283
},
32133284
"CommonResponseDto": {
32143285
"type": "object",

0 commit comments

Comments
 (0)