From a1a1f6d850304a7a44c92e8327b0e66ed8d0cb3e Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 12:29:58 -0700 Subject: [PATCH] Add Mistral inference details (#3997) (cherry picked from commit f7c35e7b15bce19824df74e7628f4160f6de65aa) --- output/openapi/elasticsearch-openapi.json | 119 +++++ .../elasticsearch-serverless-openapi.json | 119 +++++ output/schema/schema-serverless.json | 502 ++++++++++++++++++ output/schema/schema.json | 225 +++++++- output/typescript/types.ts | 23 + specification/_doc_ids/table.csv | 3 + .../_json_spec/inference.put.mistral.json | 35 ++ .../put_mistral/PutMistralRequest.ts | 114 ++++ .../put_mistral/PutMistralResponse.ts | 24 + .../request/PutMistralRequestExample1.yaml | 12 + 10 files changed, 1172 insertions(+), 4 deletions(-) create mode 100644 specification/_json_spec/inference.put.mistral.json create mode 100644 specification/inference/put_mistral/PutMistralRequest.ts create mode 100644 specification/inference/put_mistral/PutMistralResponse.ts create mode 100644 specification/inference/put_mistral/examples/request/PutMistralRequestExample1.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 494e511731..ade7468458 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18979,6 +18979,83 @@ "x-state": "Added in 8.18.0" } }, + "/_inference/{task_type}/{mistral_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Mistral inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-mistral", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_mistral:MistralTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "mistral_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_mistral:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_mistral:MistralServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutMistralRequestExample1": { + "description": "Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"mistral\",\n \"service_settings\": {\n \"api_key\": \"Mistral-API-Key\",\n \"model\": \"mistral-embed\" \n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -78841,6 +78918,48 @@ "search" ] }, + "inference.put_mistral:MistralTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_mistral:ServiceType": { + "type": "string", + "enum": [ + "mistral" + ] + }, + "inference.put_mistral:MistralServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://console.mistral.ai/api-keys/" + }, + "description": "A valid API key of your Mistral account.\nYou can find your Mistral API keys or you can create a new one on the API Keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "max_input_tokens": { + "description": "The maximum number of tokens per input before chunking occurs.", + "type": "number" + }, + "model": { + "externalDocs": { + "url": "https://docs.mistral.ai/getting-started/models/" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Mistral models documentation for the list of available text embedding models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 46e1dfc138..67fa7ea202 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -10935,6 +10935,83 @@ "x-state": "Added in 8.18.0" } }, + "/_inference/{task_type}/{mistral_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Mistral inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-mistral", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_mistral:MistralTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "mistral_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_mistral:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_mistral:MistralServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutMistralRequestExample1": { + "description": "Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"mistral\",\n \"service_settings\": {\n \"api_key\": \"Mistral-API-Key\",\n \"model\": \"mistral-embed\" \n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -50161,6 +50238,48 @@ "search" ] }, + "inference.put_mistral:MistralTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_mistral:ServiceType": { + "type": "string", + "enum": [ + "mistral" + ] + }, + "inference.put_mistral:MistralServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://console.mistral.ai/api-keys/" + }, + "description": "A valid API key of your Mistral account.\nYou can find your Mistral API keys or you can create a new one on the API Keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "max_input_tokens": { + "description": "The maximum number of tokens per input before chunking occurs.", + "type": "number" + }, + "model": { + "externalDocs": { + "url": "https://docs.mistral.ai/getting-started/models/" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Mistral models documentation for the list of available text embedding models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 9f5e69717a..88c91fd4ef 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -5112,6 +5112,99 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD +======= + "since": "8.18.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-jinaai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html", + "name": "inference.put_jinaai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_jinaai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_jinaai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{jinaai_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.15.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-mistral", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-mistral.html", + "name": "inference.put_mistral", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_mistral" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_mistral" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{mistral_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { +>>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -28755,6 +28848,251 @@ } }, { +<<<<<<< HEAD +======= + "description": "The type of service supported for the specified task type. In this case, `jinaai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `jinaai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "JinaAIServiceSettings", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "JinaAITaskSettings", + "namespace": "inference.put_jinaai" + } + } + } + ] + }, + "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutJinaAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" + }, + "PutJinaAiRequestExample2": { + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", + "summary": "A rerank task", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_jinaai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "JinaAITaskType", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "jinaai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `mistral`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_mistral" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `mistral` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "MistralServiceSettings", + "namespace": "inference.put_mistral" + } + } + } + ] + }, + "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutMistralRequestExample1": { + "description": "Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"mistral\",\n \"service_settings\": {\n \"api_key\": \"Mistral-API-Key\",\n \"model\": \"mistral-embed\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_mistral" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "MistralTaskType", + "namespace": "inference.put_mistral" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "mistral_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L28-L77" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_mistral" + }, + "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { +>>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) "description": "The type of service supported for the specified task type. In this case, `openai`.", "name": "service", "required": true, @@ -101873,6 +102211,105 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD +======= + "name": "rerank" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "JinaAITaskType", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L86-L89" + }, + { + "kind": "enum", + "members": [ + { + "name": "jinaai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L91-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "cosine" + }, + { + "name": "dot_product" + }, + { + "name": "l2_norm" + } + ], + "name": { + "name": "SimilarityType", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L95-L99" + }, + { + "kind": "enum", + "members": [ + { + "name": "classification" + }, + { + "name": "clustering" + }, + { + "name": "ingest" + }, + { + "name": "search" + } + ], + "name": { + "name": "TextEmbeddingTask", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L101-L106" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "MistralTaskType", + "namespace": "inference.put_mistral" + }, + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L79-L81" + }, + { + "kind": "enum", + "members": [ + { + "name": "mistral" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_mistral" + }, + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L83-L85" + }, + { + "kind": "enum", + "members": [ + { +>>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) "name": "chat_completion" }, { @@ -123379,9 +123816,74 @@ { "kind": "interface", "name": { +<<<<<<< HEAD ======= >>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) >>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +======= + "name": "MistralServiceSettings", + "namespace": "inference.put_mistral" + }, + "properties": [ + { + "description": "A valid API key of your Mistral account.\nYou can find your Mistral API keys or you can create a new one on the API Keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "mistral-api-keys", + "extDocUrl": "https://console.mistral.ai/api-keys/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The maximum number of tokens per input before chunking occurs.", + "name": "max_input_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Mistral models documentation for the list of available text embedding models.", + "extDocId": "mistral-api-models", + "extDocUrl": "https://docs.mistral.ai/getting-started/models/", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from the Mistral API.\nBy default, the `mistral` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L87-L114" + }, + { + "kind": "interface", + "name": { +>>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) "name": "OpenAIServiceSettings", "namespace": "inference.put_openai" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index ec75d8b9ed..67dcd1d7d3 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9943,20 +9943,37 @@ }, { "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, "stack": { + "since": "8.15.0", "stability": "stable", "visibility": "public" } }, - "description": "Configure a Mistral inference endpoint", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-mistral.html", + "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-mistral", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-mistral.html", "name": "inference.put_mistral", - "request": null, + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_mistral" + }, "requestBodyRequired": false, "requestMediaType": [ "application/json" ], - "response": null, + "response": { + "name": "Response", + "namespace": "inference.put_mistral" + }, "responseMediaType": [ "application/json" ], @@ -154497,6 +154514,206 @@ }, "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L101-L106" }, + { + "kind": "interface", + "name": { + "name": "MistralServiceSettings", + "namespace": "inference.put_mistral" + }, + "properties": [ + { + "description": "A valid API key of your Mistral account.\nYou can find your Mistral API keys or you can create a new one on the API Keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "mistral-api-keys", + "extDocUrl": "https://console.mistral.ai/api-keys/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The maximum number of tokens per input before chunking occurs.", + "name": "max_input_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Mistral models documentation for the list of available text embedding models.", + "extDocId": "mistral-api-models", + "extDocUrl": "https://docs.mistral.ai/getting-started/models/", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from the Mistral API.\nBy default, the `mistral` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L87-L114" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "MistralTaskType", + "namespace": "inference.put_mistral" + }, + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L79-L81" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `mistral`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_mistral" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `mistral` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "MistralServiceSettings", + "namespace": "inference.put_mistral" + } + } + } + ] + }, + "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutMistralRequestExample1": { + "description": "Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"mistral\",\n \"service_settings\": {\n \"api_key\": \"Mistral-API-Key\",\n \"model\": \"mistral-embed\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_mistral" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "MistralTaskType", + "namespace": "inference.put_mistral" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "mistral_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L28-L77" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_mistral" + }, + "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "mistral" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_mistral" + }, + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L83-L85" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index a6e7ef730f..bd1d8e0370 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13639,6 +13639,29 @@ export type InferencePutJinaaiSimilarityType = 'cosine' | 'dot_product' | 'l2_no export type InferencePutJinaaiTextEmbeddingTask = 'classification' | 'clustering' | 'ingest' | 'search' +export interface InferencePutMistralMistralServiceSettings { + api_key: string + max_input_tokens?: integer + model: string + rate_limit?: InferenceRateLimitSetting +} + +export type InferencePutMistralMistralTaskType = 'text_embedding' + +export interface InferencePutMistralRequest extends RequestBase { + task_type: InferencePutMistralMistralTaskType + mistral_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutMistralServiceType + service_settings: InferencePutMistralMistralServiceSettings + } +} + +export type InferencePutMistralResponse = InferenceInferenceEndpointInfo + +export type InferencePutMistralServiceType = 'mistral' + export interface InferencePutOpenaiOpenAIServiceSettings { api_key: string dimensions?: integer diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 08e1ad46a0..ec815dc137 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -350,6 +350,7 @@ inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/refe inference-api-put-jinaai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-jinaai.html inference-api-put-googlevertexai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-google-vertex-ai.html inference-api-put-googleaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-google-ai-studio.html +inference-api-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-mistral.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html inference-api-put-voyageai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-voyageai.html inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html @@ -401,6 +402,8 @@ migrate,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/migrate migrate-index-allocation-filters,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/migrate-index-allocation-filters.html migration-api-deprecation,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/migration-api-deprecation.html migration-api-feature-upgrade,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/feature-migration-api.html +mistral-api-keys,https://console.mistral.ai/api-keys/ +mistral-api-models,https://docs.mistral.ai/getting-started/models/ ml-apis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ml-apis.html ml-classification,https://www.elastic.co/guide/en/machine-learning/{branch}/ml-dfa-classification.html ml-close-job,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ml-close-job.html diff --git a/specification/_json_spec/inference.put.mistral.json b/specification/_json_spec/inference.put.mistral.json new file mode 100644 index 0000000000..97633b233c --- /dev/null +++ b/specification/_json_spec/inference.put.mistral.json @@ -0,0 +1,35 @@ +{ + "inference.put_mistral": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-mistral.html", + "description": "Configure a Mistral inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{mistral_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "mistral_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_mistral/PutMistralRequest.ts b/specification/inference/put_mistral/PutMistralRequest.ts new file mode 100644 index 0000000000..4aaa32acb9 --- /dev/null +++ b/specification/inference/put_mistral/PutMistralRequest.ts @@ -0,0 +1,114 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { integer } from '@_types/Numeric' + +/** + * Create a Mistral inference endpoint. + * + * Creates an inference endpoint to perform an inference task with the `mistral` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_mistral + * @availability stack since=8.15.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-mistral + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{mistral_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The task type. + * The only valid task type for the model to perform is `text_embedding`. + */ + task_type: MistralTaskType + /** + * The unique identifier of the inference endpoint. + */ + mistral_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `mistral`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `mistral` service. + */ + service_settings: MistralServiceSettings + } +} + +export enum MistralTaskType { + text_embedding +} + +export enum ServiceType { + mistral +} + +export class MistralServiceSettings { + /** + * A valid API key of your Mistral account. + * You can find your Mistral API keys or you can create a new one on the API Keys page. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id mistral-api-keys + */ + api_key: string + /** + * The maximum number of tokens per input before chunking occurs. + */ + max_input_tokens?: integer + /** + * The name of the model to use for the inference task. + * Refer to the Mistral models documentation for the list of available text embedding models. + * @ext_doc_id mistral-api-models + */ + model: string + /** + * This setting helps to minimize the number of rate limit errors returned from the Mistral API. + * By default, the `mistral` service sets the number of requests allowed per minute to 240. + */ + rate_limit?: RateLimitSetting +} diff --git a/specification/inference/put_mistral/PutMistralResponse.ts b/specification/inference/put_mistral/PutMistralResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_mistral/PutMistralResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_mistral/examples/request/PutMistralRequestExample1.yaml b/specification/inference/put_mistral/examples/request/PutMistralRequestExample1.yaml new file mode 100644 index 0000000000..52f173151b --- /dev/null +++ b/specification/inference/put_mistral/examples/request/PutMistralRequestExample1.yaml @@ -0,0 +1,12 @@ +# summary: +description: Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/mistral-embeddings-test" +# type: "request" +value: |- + { + "service": "mistral", + "service_settings": { + "api_key": "Mistral-API-Key", + "model": "mistral-embed" + } + }