diff --git a/docs/docs/capabilities/embeddings.md b/docs/docs/capabilities/embeddings.md index 2c2fb4d54..44f153556 100644 --- a/docs/docs/capabilities/embeddings.md +++ b/docs/docs/capabilities/embeddings.md @@ -1,7 +1,103 @@ --- title: Embeddings --- - :::info -🚧 Cortex is currently under development, and this page is a stub for future development. -::: \ No newline at end of file +🚧 Cortex is currently under development, and this page is a stub for future development. +::: + +cortex.cpp now support embeddings endpoint with fully OpenAI compatible. + +For embeddings API usage please refer to [API references](/api-reference#tag/chat/POST/v1/embeddings). This tutorial show you how to use embeddings in cortex with openai python SDK. + +## Embedding with openai compatible + +### 1. Start server and run model + +``` +cortex run llama3.1:8b-gguf-q4-km +``` + +### 2. Create script `embeddings.py` with this content + +``` +from datetime import datetime +from openai import OpenAI +from pydantic import BaseModel +ENDPOINT = "http://localhost:39281/v1" +MODEL = "llama3.1:8bb-gguf-q4-km" +client = OpenAI( + base_url=ENDPOINT, + api_key="not-needed" +) +``` + +### 3. Create embeddings + +``` +response = client.embeddings.create(input = "embedding", model=MODEL, encoding_format="base64") +print(response) +``` + +The reponse will be like this + +``` +CreateEmbeddingResponse( + data=[ + Embedding( + embedding='hjuAPOD8TryuPU8...', + index=0, + object='embedding' + ) + ], + model='meta-llama3.1-8b-instruct', + object='list', + usage=Usage( + prompt_tokens=2, + total_tokens=2 + ) +) +``` + + +The output embeddings is encoded as base64 string. Default the model will output the embeddings in float mode. + +``` +response = client.embeddings.create(input = "embedding", model=MODEL) +print(response) +``` + +Result will be + +``` +CreateEmbeddingResponse( + data=[ + Embedding( + embedding=[0.1, 0.3, 0.4 ....], + index=0, + object='embedding' + ) + ], + model='meta-llama3.1-8b-instruct', + object='list', + usage=Usage( + prompt_tokens=2, + total_tokens=2 + ) +) +``` + +Cortex also supports all input types as [OpenAI](https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-input). + +```sh +# input as string +response = client.embeddings.create(input = "embedding", model=MODEL) + +# input as array of string +response = client.embeddings.create(input = ["embedding"], model=MODEL) + +# input as array of tokens +response = client.embeddings.create(input = [12,44,123], model=MODEL) + +# input as array of arrays contain tokens +response = client.embeddings.create(input = [[912,312,54],[12,433,1241]], model=MODEL) +``` diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 763337b5c..c1ef736db 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -22,7 +22,9 @@ "description": "The assistant has been successfully created." } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] }, "get": { "operationId": "AssistantsController_findAll", @@ -81,7 +83,9 @@ } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] } }, "/v1/assistants/{id}": { @@ -112,7 +116,9 @@ } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] }, "delete": { "operationId": "AssistantsController_remove", @@ -141,7 +147,9 @@ } } }, - "tags": ["Assistants"] + "tags": [ + "Assistants" + ] } }, "/healthz": { @@ -158,7 +166,9 @@ } } }, - "tags": ["Server"] + "tags": [ + "Server" + ] } }, "/processManager/destroy": { @@ -175,7 +185,120 @@ } } }, - "tags": ["Server"] + "tags": [ + "Server" + ] + } + }, + "/v1/embeddings": { + "post": { + "summary": "Create embeddings", + "description": "Creates an embedding vector representing the input text.", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "oneOf": [ + { + "type": "string", + "description":"The string that will be turned into an embedding." + }, + { + "type": "array", + "description" : "The array of strings that will be turned into an embedding.", + "items": { + "type": "string" + } + }, + { + "type": "array", + "description": "The array of integers that will be turned into an embedding.", + "items": { + "type": "integer" + + } + }, + { + "type": "array", + "description" : "The array of arrays containing integers that will be turned into an embedding.", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + ], + "description": "Input text to embed, encoded as a string or array of tokens. Cannot be empty." + }, + "model": { + "type": "string", + "description": "ID of the model to use.", + "example": "text-embedding-ada-002" + }, + "encoding_format": { + "type": "string", + "description": "The format to return the embeddings in.", + "enum": [ + "float", + "base64" + ], + "default": "float" + } + }, + "required": [ + "input", + "model" + ] + } + } + } + }, + "responses": { + "200": { + "description": "A list of embedding vectors", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "index": { + "type": "integer", + "description": "The index of the embedding in the list of embeddings." + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + }, + "description": "The embedding vector, which is a list of floats." + }, + "object": { + "type": "string", + "description": "The object type, which is always 'embedding'.", + "example": "embedding" + } + } + } + } + } + } + } + } + } + }, + "tags": [ + "Embeddings" + ] } }, "/v1/chat/completions": { @@ -215,7 +338,9 @@ } } }, - "tags": ["Chat"] + "tags": [ + "Chat" + ] } }, "/v1/models/pull": { @@ -314,10 +439,14 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] }, "delete": { - "tags": ["Pulling Models"], + "tags": [ + "Pulling Models" + ], "summary": "Stop model download", "description": "Stops the download of a model with the corresponding taskId provided in the request body", "operationId": "ModelsController_stopModelDownload", @@ -333,7 +462,9 @@ "description": "The unique identifier of the download task to be stopped" } }, - "required": ["taskId"] + "required": [ + "taskId" + ] } } } @@ -428,7 +559,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/start": { @@ -461,7 +594,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/stop": { @@ -494,7 +629,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/{id}": { @@ -525,7 +662,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] }, "delete": { "operationId": "ModelsController_remove", @@ -554,7 +693,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/{model}": { @@ -594,7 +735,9 @@ } } }, - "tags": ["Running Models"] + "tags": [ + "Running Models" + ] } }, "/v1/models/import": { @@ -635,14 +778,18 @@ } } }, - "tags": ["Pulling Models"] + "tags": [ + "Pulling Models" + ] } }, "/v1/threads": { "post": { "operationId": "ThreadsController_create", "summary": "Create thread", - "tags": ["Threads"], + "tags": [ + "Threads" + ], "description": "Creates a new thread.", "parameters": [], "requestBody": { @@ -671,7 +818,9 @@ "get": { "operationId": "ThreadsController_findAll", "summary": "List threads", - "tags": ["Threads"], + "tags": [ + "Threads" + ], "description": "Lists all the available threads along with its configurations.", "parameters": [], "responses": { @@ -695,7 +844,9 @@ "get": { "operationId": "ThreadsController_retrieveMessage", "summary": "Retrieve message", - "tags": ["Messages"], + "tags": [ + "Messages" + ], "description": "Retrieves a message.", "parameters": [ { @@ -731,7 +882,9 @@ "post": { "operationId": "ThreadsController_updateMessage", "summary": "Modify message", - "tags": ["Messages"], + "tags": [ + "Messages" + ], "description": "Modifies a message.", "responses": { "201": { @@ -778,7 +931,9 @@ "operationId": "ThreadsController_deleteMessage", "summary": "Delete message", "description": "Deletes a message.", - "tags": ["Messages"], + "tags": [ + "Messages" + ], "parameters": [ { "name": "thread_id", @@ -815,7 +970,9 @@ "get": { "operationId": "ThreadsController_getMessagesOfThread", "summary": "List messages", - "tags": ["Messages"], + "tags": [ + "Messages" + ], "description": "Returns a list of messages for a given thread.", "parameters": [ { @@ -883,7 +1040,9 @@ "post": { "operationId": "ThreadsController_createMessageInThread", "summary": "Create message", - "tags": ["Messages"], + "tags": [ + "Messages" + ], "description": "Create a message.", "responses": { "201": { @@ -924,7 +1083,9 @@ "operationId": "ThreadsController_cleanThread", "summary": "Clean thread", "description": "Deletes all messages in a thread.", - "tags": ["Threads"], + "tags": [ + "Threads" + ], "parameters": [ { "name": "thread_id", @@ -946,7 +1107,9 @@ "get": { "operationId": "ThreadsController_retrieveThread", "summary": "Retrieve thread", - "tags": ["Threads"], + "tags": [ + "Threads" + ], "description": "Retrieves a thread.", "parameters": [ { @@ -974,7 +1137,9 @@ "post": { "operationId": "ThreadsController_modifyThread", "summary": "Modify thread", - "tags": ["Threads"], + "tags": [ + "Threads" + ], "description": "Modifies a thread.", "parameters": [ { @@ -1015,7 +1180,9 @@ "delete": { "operationId": "ThreadsController_remove", "summary": "Delete thread", - "tags": ["Threads"], + "tags": [ + "Threads" + ], "description": "Deletes a specific thread defined by a thread `id` .", "parameters": [ { @@ -1052,7 +1219,9 @@ "description": "" } }, - "tags": ["System"] + "tags": [ + "System" + ] }, "get": { "operationId": "SystemController_get", @@ -1064,7 +1233,9 @@ "description": "Ok" } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/system/events/download": { @@ -1085,7 +1256,9 @@ } } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/system/events/model": { @@ -1106,7 +1279,9 @@ } } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/system/events/resources": { @@ -1127,7 +1302,9 @@ } } }, - "tags": ["System"] + "tags": [ + "System" + ] } }, "/v1/engines/{name}": { @@ -1142,7 +1319,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -1189,7 +1370,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "post": { "summary": "Install an engine", @@ -1201,7 +1384,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -1235,7 +1422,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "delete": { "summary": "Uninstall an engine", @@ -1247,7 +1436,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -1324,7 +1517,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/default": { @@ -1338,7 +1533,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -1370,7 +1569,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "post": { "summary": "Set default engine variant", @@ -1382,7 +1583,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The type of engine" @@ -1424,7 +1629,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/load": { @@ -1438,7 +1645,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The name of the engine to update" @@ -1462,7 +1673,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] }, "delete": { "summary": "Unload engine", @@ -1474,7 +1687,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The name of the engine to update" @@ -1498,7 +1715,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/engines/{name}/update": { @@ -1512,7 +1731,11 @@ "required": true, "schema": { "type": "string", - "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"], + "enum": [ + "llama-cpp", + "onnxruntime", + "tensorrt-llm" + ], "default": "llama-cpp" }, "description": "The name of the engine to update" @@ -1536,7 +1759,9 @@ } } }, - "tags": ["Engines"] + "tags": [ + "Engines" + ] } }, "/v1/configs": { @@ -1556,7 +1781,10 @@ "items": { "type": "string" }, - "example": ["http://localhost:39281", "https://cortex.so"] + "example": [ + "http://localhost:39281", + "https://cortex.so" + ] }, "cors": { "type": "boolean", @@ -1575,10 +1803,14 @@ } } }, - "tags": ["Configurations"] + "tags": [ + "Configurations" + ] }, "patch": { - "tags": ["Configurations"], + "tags": [ + "Configurations" + ], "summary": "Update configuration settings", "requestBody": { "required": true, @@ -1598,7 +1830,10 @@ "type": "string" }, "description": "List of allowed origins.", - "example": ["http://localhost:39281", "https://cortex.so"] + "example": [ + "http://localhost:39281", + "https://cortex.so" + ] } } } @@ -1656,6 +1891,10 @@ "name": "Chat", "description": "This endpoint initiates interaction with a Large Language Models (LLM)." }, + { + "name": "Embeddings", + "description": "This endpoint create embeddings for a given input text or tokens." + }, { "name": "Assistants", "description": "These endpoints manage the lifecycle of an Assistant within a conversation thread." @@ -1698,6 +1937,7 @@ "name": "CORTEX", "tags": [ "Chat", + "Embeddings", "Engines", "Events", "Pulling Models", @@ -1880,7 +2120,11 @@ "description": "Indicates whether the assistant was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "Message": { "type": "object", @@ -1897,14 +2141,21 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "name": { "type": "string", "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." } }, - "required": ["role"] + "required": [ + "role" + ] }, "SystemMessage": { "allOf": [ @@ -1933,7 +2184,10 @@ "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." } }, - "required": ["content", "role"] + "required": [ + "content", + "role" + ] } ] }, @@ -1984,7 +2238,10 @@ "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role." } }, - "required": ["content", "role"] + "required": [ + "content", + "role" + ] } ] }, @@ -2096,7 +2353,10 @@ "type": "string" } }, - "required": ["content", "tool_call_id"] + "required": [ + "content", + "tool_call_id" + ] } ] }, @@ -2113,26 +2373,36 @@ "properties": { "type": { "type": "string", - "enum": ["text"] + "enum": [ + "text" + ] }, "text": { "type": "string" } }, - "required": ["type", "text"] + "required": [ + "type", + "text" + ] }, "ImageContentPart": { "type": "object", "properties": { "type": { "type": "string", - "enum": ["image_url"] + "enum": [ + "image_url" + ] }, "image_url": { "$ref": "#/components/schemas/ImageUrl" } }, - "required": ["type", "image_url"] + "required": [ + "type", + "image_url" + ] }, "AudioContentPart": { "type": "object", @@ -2145,7 +2415,10 @@ "$ref": "#/components/schemas/InputAudio" } }, - "required": ["type", "input_audio"] + "required": [ + "type", + "input_audio" + ] }, "RefusalContentPart": { "type": "object", @@ -2157,7 +2430,10 @@ "type": "string" } }, - "required": ["type", "refusal"] + "required": [ + "type", + "refusal" + ] }, "ImageUrl": { "type": "object", @@ -2172,7 +2448,9 @@ "description": "Specifies the detail level of the image. Defaults to `auto`." } }, - "required": ["url"] + "required": [ + "url" + ] }, "InputAudio": { "type": "object", @@ -2183,11 +2461,17 @@ }, "format": { "type": "string", - "enum": ["wav", "mp3"], + "enum": [ + "wav", + "mp3" + ], "description": "The format of the encoded audio data. Currently supports `wav` and `mp3`." } }, - "required": ["data", "format"] + "required": [ + "data", + "format" + ] }, "Audio": { "type": "object", @@ -2198,7 +2482,9 @@ "description": "Unique identifier for a previous audio response from the model." } }, - "required": ["id"] + "required": [ + "id" + ] }, "ToolCall": { "type": "object", @@ -2213,7 +2499,11 @@ "$ref": "#/components/schemas/FunctionCall" } }, - "required": ["id", "type", "function"] + "required": [ + "id", + "type", + "function" + ] }, "FunctionCall": { "type": "object", @@ -2225,7 +2515,10 @@ "type": "string" } }, - "required": ["name", "arguments"] + "required": [ + "name", + "arguments" + ] }, "CreateChatCompletionDto": { "type": "object", @@ -2279,7 +2572,9 @@ }, "stop": { "description": "Defines specific tokens or phrases that signal the model to stop producing further output.", - "example": ["End"], + "example": [ + "End" + ], "type": "array", "items": { "type": "string" @@ -2309,10 +2604,15 @@ "type": "array", "items": { "type": "string", - "enum": ["text", "audio"] + "enum": [ + "text", + "audio" + ] }, "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", - "example": ["text"] + "example": [ + "text" + ] }, "audio": { "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).", @@ -2325,10 +2625,19 @@ "format": { "type": "string", "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.", - "enum": ["mp3", "wav", "flac", "opus", "pcm16"] + "enum": [ + "mp3", + "wav", + "flac", + "opus", + "pcm16" + ] } }, - "required": ["voice", "format"] + "required": [ + "voice", + "format" + ] }, "store": { "type": "boolean", @@ -2375,10 +2684,16 @@ "type": { "type": "string", "description": "The format of the generated output. Must be one of `text`, `json_schema` or `json_object`.", - "enum": ["text", "json_object", "json_schema"] + "enum": [ + "text", + "json_object", + "json_schema" + ] } }, - "required": ["type"] + "required": [ + "type" + ] }, "seed": { "type": "number", @@ -2408,26 +2723,37 @@ "properties": { "type": { "type": "string", - "enum": ["function"] + "enum": [ + "function" + ] }, "function": { "$ref": "#/components/schemas/Function" } }, - "required": ["type", "function"] + "required": [ + "type", + "function" + ] }, "tool_choice": { "anyOf": [ { "type": "string", - "enum": ["none", "auto", "required"] + "enum": [ + "none", + "auto", + "required" + ] }, { "type": "object", "properties": { "type": { "type": "string", - "enum": ["function"] + "enum": [ + "function" + ] }, "function": { "type": "object", @@ -2436,10 +2762,15 @@ "type": "string" } }, - "required": ["name"] + "required": [ + "name" + ] } }, - "required": ["type", "function"] + "required": [ + "type", + "function" + ] } ] }, @@ -2514,7 +2845,10 @@ "description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine." } }, - "required": ["messages", "model"] + "required": [ + "messages", + "model" + ] }, "Function": { "type": "object", @@ -2534,7 +2868,9 @@ "default": false } }, - "required": ["name"] + "required": [ + "name" + ] }, "MessageDto": { "type": "object", @@ -2548,7 +2884,10 @@ "description": "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message." } }, - "required": ["content", "role"] + "required": [ + "content", + "role" + ] }, "ChoiceDto": { "type": "object", @@ -2570,7 +2909,11 @@ ] } }, - "required": ["finish_reason", "index", "message"] + "required": [ + "finish_reason", + "index", + "message" + ] }, "UsageDto": { "type": "object", @@ -2588,7 +2931,11 @@ "description": "The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation." } }, - "required": ["completion_tokens", "prompt_tokens", "total_tokens"] + "required": [ + "completion_tokens", + "prompt_tokens", + "total_tokens" + ] }, "ChatCompletionResponseDto": { "type": "object", @@ -2615,11 +2962,17 @@ "type": "object", "properties": { "content": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The contents of the message." }, "refusal": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The refusal message generated by the model." }, "tool_calls": { @@ -2648,10 +3001,17 @@ "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." } }, - "required": ["name", "arguments"] + "required": [ + "name", + "arguments" + ] } }, - "required": ["id", "type", "function"] + "required": [ + "id", + "type", + "function" + ] } }, "role": { @@ -2672,7 +3032,10 @@ "description": "The name of the function to call." } }, - "required": ["arguments", "name"] + "required": [ + "arguments", + "name" + ] }, "audio": { "type": "object", @@ -2695,17 +3058,27 @@ "description": "Transcript of the audio generated by the model." } }, - "required": ["id", "expires_at", "data", "transcript"] + "required": [ + "id", + "expires_at", + "data", + "transcript" + ] } }, - "required": ["role"] + "required": [ + "role" + ] }, "logprobs": { "type": "object", "description": "Log probability information for the choice.", "properties": { "content": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message content tokens with log probability information.", "items": { "type": "object", @@ -2719,11 +3092,17 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "top_logprobs": { @@ -2741,15 +3120,24 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "refusal": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message refusal tokens with log probability information.", "items": { "type": "object", @@ -2763,17 +3151,27 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } } } } }, - "required": ["finish_reason", "index", "message"] + "required": [ + "finish_reason", + "index", + "message" + ] } }, "created": { @@ -2785,7 +3183,10 @@ "description": "The model used for the chat completion." }, "service_tier": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request." }, "system_fingerprint": { @@ -2825,7 +3226,10 @@ "description": "Tokens generated by the model for reasoning." } }, - "required": ["audio_tokens", "reasoning_tokens"] + "required": [ + "audio_tokens", + "reasoning_tokens" + ] }, "prompt_tokens_details": { "type": "object", @@ -2840,7 +3244,10 @@ "description": "Cached tokens present in the prompt." } }, - "required": ["audio_tokens", "cached_tokens"] + "required": [ + "audio_tokens", + "cached_tokens" + ] } }, "required": [ @@ -2880,7 +3287,10 @@ "description": "A chat completion delta generated by streamed model responses.", "properties": { "content": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The contents of the chunk message." }, "function_call": { @@ -2918,10 +3328,18 @@ "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function." } }, - "required": ["name", "arguments"] + "required": [ + "name", + "arguments" + ] } }, - "required": ["index", "id", "type", "function"] + "required": [ + "index", + "id", + "type", + "function" + ] } }, "role": { @@ -2929,7 +3347,10 @@ "description": "The role of the author of this message." }, "refusal": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The refusal message generated by the model." } } @@ -2939,7 +3360,10 @@ "description": "Log probability information for the choice.", "properties": { "content": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message content tokens with log probability information.", "items": { "type": "object", @@ -2953,11 +3377,17 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "top_logprobs": { @@ -2975,15 +3405,24 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } }, "refusal": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of message refusal tokens with log probability information.", "items": { "type": "object", @@ -2997,17 +3436,26 @@ "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely." }, "bytes": { - "type": ["array", "null"], + "type": [ + "array", + "null" + ], "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token." } }, - "required": ["token", "logprob"] + "required": [ + "token", + "logprob" + ] } } } }, "finish_reason": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function." }, "index": { @@ -3015,7 +3463,10 @@ "description": "The index of the choice in the list of choices." } }, - "required": ["delta", "index"] + "required": [ + "delta", + "index" + ] } }, "created": { @@ -3027,7 +3478,10 @@ "description": "The model used to generate the completion." }, "service_tier": { - "type": ["string", "null"], + "type": [ + "string", + "null" + ], "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request." }, "system_fingerprint": { @@ -3055,7 +3509,11 @@ "description": "Total number of tokens used in the request (prompt + completion)." } }, - "required": ["completion_tokens", "prompt_tokens", "total_tokens"] + "required": [ + "completion_tokens", + "prompt_tokens", + "total_tokens" + ] } }, "required": [ @@ -3076,7 +3534,9 @@ "description": "The name of the embedding model to be used." }, "input": { - "example": ["Hello World"], + "example": [ + "Hello World" + ], "description": "The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.", "type": "array", "items": { @@ -3094,7 +3554,10 @@ "description": "Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional." } }, - "required": ["model", "input"] + "required": [ + "model", + "input" + ] }, "EmbeddingsResponseDto": { "type": "object", @@ -3123,11 +3586,18 @@ ] } }, - "required": ["object", "model", "embedding", "usage"] + "required": [ + "object", + "model", + "embedding", + "usage" + ] }, "PullModelRequest": { "type": "object", - "required": ["model"], + "required": [ + "model" + ], "properties": { "model": { "type": "string", @@ -3185,7 +3655,9 @@ }, "files": { "description": "The URL sources from which the model downloaded or accessed.", - "example": ["https://huggingface.co/cortexso/mistral/tree/gguf"], + "example": [ + "https://huggingface.co/cortexso/mistral/tree/gguf" + ], "oneOf": [ { "type": "array", @@ -3205,7 +3677,9 @@ }, "stop": { "description": "Defines specific tokens or phrases that signal the model to stop producing further output.", - "example": ["End"], + "example": [ + "End" + ], "type": "array", "items": { "type": "string" @@ -3275,7 +3749,10 @@ "default": "" } }, - "required": ["model", "files"] + "required": [ + "model", + "files" + ] }, "StartModelSuccessDto": { "type": "object", @@ -3289,7 +3766,10 @@ "description": "The unique identifier of the model." } }, - "required": ["message", "modelId"] + "required": [ + "message", + "modelId" + ] }, "ModelStartDto": { "type": "object", @@ -3336,7 +3816,9 @@ "example": "/tmp/model.gguf" } }, - "required": ["model"] + "required": [ + "model" + ] }, "ModelStopDto": { "type": "object", @@ -3347,7 +3829,9 @@ "description": "A downloaded model name." } }, - "required": ["model"] + "required": [ + "model" + ] }, "ImportModelRequest": { "type": "object", @@ -3367,10 +3851,16 @@ "option": { "type": "string", "description": "Import options such as symlink or copy.", - "enum": ["symlink", "copy"] + "enum": [ + "symlink", + "copy" + ] } }, - "required": ["model", "modelPath"] + "required": [ + "model", + "modelPath" + ] }, "ImportModelResponse": { "type": "object", @@ -3389,7 +3879,11 @@ "example": "OK" } }, - "required": ["message", "modelHandle", "result"] + "required": [ + "message", + "modelHandle", + "result" + ] }, "CommonResponseDto": { "type": "object", @@ -3399,7 +3893,9 @@ "description": "The response success or error message." } }, - "required": ["message"] + "required": [ + "message" + ] }, "EngineUninstallationResponseDto": { "type": "object", @@ -3455,7 +3951,11 @@ "example": "OK" } }, - "required": ["data", "object", "result"] + "required": [ + "data", + "object", + "result" + ] }, "Engine": { "type": "object", @@ -3485,7 +3985,12 @@ "example": "0.1.34" } }, - "required": ["description", "name", "productName", "status"] + "required": [ + "description", + "name", + "productName", + "status" + ] }, "ModelDto": { "type": "object", @@ -3501,7 +4006,9 @@ "description": "A predefined text or framework that guides the AI model's response generation." }, "stop": { - "example": ["End"], + "example": [ + "End" + ], "description": "Defines specific tokens or phrases that signal the model to stop producing further output.", "type": "array", "items": { @@ -3613,7 +4120,9 @@ "example": "llamacpp" } }, - "required": ["id"] + "required": [ + "id" + ] }, "ListModelsResponseDto": { "type": "object", @@ -3621,7 +4130,9 @@ "object": { "type": "string", "example": "list", - "enum": ["list"] + "enum": [ + "list" + ] }, "data": { "description": "List of models", @@ -3631,7 +4142,10 @@ } } }, - "required": ["object", "data"] + "required": [ + "object", + "data" + ] }, "UpdateModelDto": { "type": "object", @@ -3650,7 +4164,9 @@ "items": { "type": "string" }, - "example": [""] + "example": [ + "" + ] }, "stream": { "type": "boolean", @@ -3809,7 +4325,11 @@ "description": "Indicates whether the model was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "CreateThreadAssistantDto": { "type": "object", @@ -3899,7 +4419,10 @@ "tool_resources": { "type": "object", "example": { - "resources": ["database1", "database2"] + "resources": [ + "database1", + "database2" + ] }, "description": "Tool resources for the assistant." } @@ -3927,7 +4450,9 @@ } } }, - "required": ["assistants"] + "required": [ + "assistants" + ] }, "ContentDto": { "type": "object", @@ -3946,7 +4471,10 @@ "description": "Text content of the message along with any annotations." } }, - "required": ["type", "text"] + "required": [ + "type", + "text" + ] }, "GetMessageResponseDto": { "type": "object", @@ -4120,7 +4648,13 @@ "description": "Indicates whether there are more messages to retrieve." } }, - "required": ["object", "data", "first_id", "last_id", "has_more"] + "required": [ + "object", + "data", + "first_id", + "last_id", + "has_more" + ] }, "CreateMessageDto": { "type": "object", @@ -4136,7 +4670,10 @@ "description": "The text contents of the message." } }, - "required": ["role", "content"] + "required": [ + "role", + "content" + ] }, "UpdateMessageDto": { "type": "object", @@ -4162,7 +4699,11 @@ "description": "Indicates whether the message was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] }, "GetThreadResponseDto": { "type": "object", @@ -4183,7 +4724,9 @@ "description": "Unix timestamp representing the creation time of the thread." }, "assistants": { - "example": ["assistant-001"], + "example": [ + "assistant-001" + ], "description": "List of assistants involved in the thread.", "type": "array", "items": { @@ -4237,8 +4780,12 @@ "description": "Indicates whether the thread was successfully deleted." } }, - "required": ["id", "object", "deleted"] + "required": [ + "id", + "object", + "deleted" + ] } } } -} +} \ No newline at end of file