diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index f2045a8ba9..996e44e4b7 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17928,6 +17928,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure AI studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureAiStudioRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{cohere_inference_id}": { "put": { "tags": [ @@ -77929,6 +78015,80 @@ "max_tokens" ] }, + "inference.put_azureaistudio:AzureAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureaistudio:ServiceType": { + "type": "string", + "enum": [ + "azureaistudio" + ] + }, + "inference.put_azureaistudio:AzureAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://ai.azure.com/" + }, + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "endpoint_type": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio" + }, + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "type": "string" + }, + "target": { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "endpoint_type", + "target", + "provider" + ] + }, + "inference.put_azureaistudio:AzureAiStudioTaskSettings": { + "type": "object", + "properties": { + "do_sample": { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "type": "number" + }, + "max_new_tokens": { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "type": "number" + }, + "user": { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_cohere:CohereTaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 084973d6fe..4b2951c1d9 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9750,6 +9750,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure AI studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureAiStudioRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{cohere_inference_id}": { "put": { "tags": [ @@ -49121,6 +49207,80 @@ "max_tokens" ] }, + "inference.put_azureaistudio:AzureAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureaistudio:ServiceType": { + "type": "string", + "enum": [ + "azureaistudio" + ] + }, + "inference.put_azureaistudio:AzureAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://ai.azure.com/" + }, + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "endpoint_type": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio" + }, + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "type": "string" + }, + "target": { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "endpoint_type", + "target", + "provider" + ] + }, + "inference.put_azureaistudio:AzureAiStudioTaskSettings": { + "type": "object", + "properties": { + "do_sample": { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "type": "number" + }, + "max_new_tokens": { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "type": "number" + }, + "user": { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_cohere:CohereTaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 2873629756..2d403c65e0 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4681,6 +4681,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "name": "inference.put_azureaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureaistudio_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27682,6 +27727,136 @@ }, "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + } + } + } + ] + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -102217,6 +102392,35 @@ }, "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" + }, { "kind": "enum", "members": [ @@ -123312,6 +123516,151 @@ ], "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135" }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" + }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 7f583fec96..2abb7adbfe 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9393,6 +9393,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "name": "inference.put_azureaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureaistudio_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -151436,6 +151481,310 @@ }, "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + } + } + } + ] + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 5081060693..acdb73f1cc 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13315,6 +13315,39 @@ export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfo export type InferencePutAnthropicServiceType = 'anthropic' +export interface InferencePutAzureaistudioAzureAiStudioServiceSettings { + api_key: string + endpoint_type: string + target: string + provider: string + rate_limit?: InferenceRateLimitSetting +} + +export interface InferencePutAzureaistudioAzureAiStudioTaskSettings { + do_sample?: float + max_new_tokens?: integer + temperature?: float + top_p?: float + user?: string +} + +export type InferencePutAzureaistudioAzureAiStudioTaskType = 'completion' | 'text_embedding' + +export interface InferencePutAzureaistudioRequest extends RequestBase { + task_type: InferencePutAzureaistudioAzureAiStudioTaskType + azureaistudio_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAzureaistudioServiceType + service_settings: InferencePutAzureaistudioAzureAiStudioServiceSettings + task_settings?: InferencePutAzureaistudioAzureAiStudioTaskSettings + } +} + +export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo + +export type InferencePutAzureaistudioServiceType = 'azureaistudio' + export interface InferencePutCohereCohereServiceSettings { api_key: string embedding_type?: InferencePutCohereEmbeddingType diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 08d221524a..9d508f19d1 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -27,6 +27,8 @@ autoscaling-get-autoscaling-capacity,https://www.elastic.co/docs/api/doc/elastic autoscaling-get-autoscaling-policy,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-autoscaling-get-autoscaling-policy autoscaling-put-autoscaling-policy,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-autoscaling-put-autoscaling-policy avoid-index-pattern-collisions,https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html#avoid-index-pattern-collisions +azureaistudio-api-keys,https://ai.azure.com/ +azureaistudio-endpoint-types,https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio behavioral-analytics-collection-event,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search-application-post-behavioral-analytics-event behavioral-analytics-event-reference,https://www.elastic.co/guide/en/elasticsearch/reference/current/behavioral-analytics-event-reference.html byte-units,https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#byte-units @@ -332,6 +334,7 @@ inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elast inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html +inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elasticsearch.html inference-api-put-elser,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html diff --git a/specification/_json_spec/inference.put_azureaistudio.json b/specification/_json_spec/inference.put_azureaistudio.json new file mode 100644 index 0000000000..00de83eca7 --- /dev/null +++ b/specification/_json_spec/inference.put_azureaistudio.json @@ -0,0 +1,35 @@ +{ + "inference.put_azureaistudio": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "description": "Configure an Azure AI Studio inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{azureaistudio_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "azureaistudio_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts new file mode 100644 index 0000000000..5e3602f381 --- /dev/null +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts @@ -0,0 +1,164 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Azure AI studio inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `azureaistudio` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_azureaistudio + * @availability stack since=8.14.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-azureaistudio + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{azureaistudio_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AzureAiStudioTaskType + /** + * The unique identifier of the inference endpoint. + */ + azureaistudio_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `azureaistudio`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `openai` service. + */ + service_settings: AzureAiStudioServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AzureAiStudioTaskSettings + } +} + +export enum AzureAiStudioTaskType { + completion, + text_embedding +} + +export enum ServiceType { + azureaistudio +} + +export class AzureAiStudioServiceSettings { + /** + * A valid API key of your Azure AI Studio model deployment. + * This key can be found on the overview page for your deployment in the management section of your Azure AI Studio account. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id azureaistudio-api-keys + */ + api_key: string + /** + * The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`. + * The `token` endpoint type is for "pay as you go" endpoints that are billed per token. + * The `realtime` endpoint type is for "real-time" endpoints that are billed per hour of usage. + * @ext_doc_id azureaistudio-endpoint-types + */ + endpoint_type: string + /** + * The target URL of your Azure AI Studio model deployment. + * This can be found on the overview page for your deployment in the management section of your Azure AI Studio account. + */ + target: string + /** + * The model provider for your deployment. + * Note that some providers may support only certain task types. + * Supported providers include: + * + * * `cohere` - available for `text_embedding` and `completion` task types + * * `databricks` - available for `completion` task type only + * * `meta` - available for `completion` task type only + * * `microsoft_phi` - available for `completion` task type only + * * `mistral` - available for `completion` task type only + * * `openai` - available for `text_embedding` and `completion` task types + */ + provider: string + /** + * This setting helps to minimize the number of rate limit errors returned from Azure AI Studio. + * By default, the `azureaistudio` service sets the number of requests allowed per minute to 240. + */ + rate_limit?: RateLimitSetting +} + +export class AzureAiStudioTaskSettings { + /** + * For a `completion` task, instruct the inference process to perform sampling. + * It has no effect unless `temperature` or `top_p` is specified. + */ + do_sample?: float + /** + * For a `completion` task, provide a hint for the maximum number of output tokens to be generated. + * @server_default 64 + */ + max_new_tokens?: integer + /** + * For a `completion` task, control the apparent creativity of generated completions with a sampling temperature. + * It must be a number in the range of 0.0 to 2.0. + * It should not be used if `top_p` is specified. + */ + temperature?: float + /** + * For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability. + * It is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0. + * It should not be used if `temperature` is specified. + */ + top_p?: float + /** + * For a `text_embedding` task, specify the user issuing the request. + * This information can be used for abuse detection. + */ + user?: string +} diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml new file mode 100644 index 0000000000..0db68a9a3a --- /dev/null +++ b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment. +# method_request: "PUT _inference/text_embedding/azure_ai_studio_embeddings" +# type: "request" +value: |- + { + "service": "azureaistudio", + "service_settings": { + "api_key": "Azure-AI-Studio-API-key", + "target": "Target-Uri", + "provider": "openai", + "endpoint_type": "token" + } + } diff --git a/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml new file mode 100644 index 0000000000..74d00dce8e --- /dev/null +++ b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task. +# method_request: "PUT _inference/completion/azure_ai_studio_completion" +# type: "request" +value: |- + { + "service": "azureaistudio", + "service_settings": { + "api_key": "Azure-AI-Studio-API-key", + "target": "Target-URI", + "provider": "databricks", + "endpoint_type": "realtime" + } + }