From 3fae18e6bffadd46fad90ae72c32db19c587c819 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 09:49:47 -0700 Subject: [PATCH] Add Azure OpenAI inference details (#4019) (cherry picked from commit d5b1a529a13e4e8eabc56716ed86be6819bd0718) --- output/openapi/elasticsearch-openapi.json | 155 +++++++- .../elasticsearch-serverless-openapi.json | 155 +++++++- output/schema/schema-serverless.json | 336 +++++++++++++++++- output/schema/schema.json | 320 ++++++++++++++++- output/typescript/types.ts | 30 ++ specification/_doc_ids/table.csv | 7 + .../_json_spec/inference.put_azureopenai.json | 35 ++ .../put_azureopenai/PutAzureOpenAiRequest.ts | 152 ++++++++ .../put_azureopenai/PutAzureOpenAiResponse.ts | 24 ++ .../PutAzureOpenAiRequestExample1.yaml | 14 + .../PutAzureOpenAiRequestExample2.yaml | 14 + .../inference/put_openai/PutOpenAiRequest.ts | 2 +- 12 files changed, 1239 insertions(+), 5 deletions(-) create mode 100644 specification/_json_spec/inference.put_azureopenai.json create mode 100644 specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts create mode 100644 specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts create mode 100644 specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml create mode 100644 specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 716dd69538..379b3c8348 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17844,6 +17844,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureopenai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureopenai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureopenai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureOpenAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{huggingface_inference_id}": { "put": { "tags": [ @@ -77225,6 +77311,73 @@ } } }, + "inference.put_azureopenai:AzureOpenAITaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureopenai:ServiceType": { + "type": "string", + "enum": [ + "azureopenai" + ] + }, + "inference.put_azureopenai:AzureOpenAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "type": "string" + }, + "deployment_id": { + "externalDocs": { + "url": "https://oai.azure.com/" + }, + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "type": "string" + }, + "entra_id": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "resource_name": { + "externalDocs": { + "url": "https://portal.azure.com/#view/HubsExtension/BrowseAll" + }, + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "type": "string" + } + }, + "required": [ + "api_version", + "deployment_id", + "resource_name" + ] + }, + "inference.put_azureopenai:AzureOpenAITaskSettings": { + "type": "object", + "properties": { + "user": { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_hugging_face:HuggingFaceTaskType": { "type": "string", "enum": [ @@ -77429,7 +77582,7 @@ "type": "object", "properties": { "user": { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "type": "string" } } diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index ccf3f005ee..8e63f206a6 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9670,6 +9670,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureopenai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureopenai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureopenai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureOpenAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{huggingface_inference_id}": { "put": { "tags": [ @@ -48421,6 +48507,73 @@ } } }, + "inference.put_azureopenai:AzureOpenAITaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureopenai:ServiceType": { + "type": "string", + "enum": [ + "azureopenai" + ] + }, + "inference.put_azureopenai:AzureOpenAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "type": "string" + }, + "deployment_id": { + "externalDocs": { + "url": "https://oai.azure.com/" + }, + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "type": "string" + }, + "entra_id": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "resource_name": { + "externalDocs": { + "url": "https://portal.azure.com/#view/HubsExtension/BrowseAll" + }, + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "type": "string" + } + }, + "required": [ + "api_version", + "deployment_id", + "resource_name" + ] + }, + "inference.put_azureopenai:AzureOpenAITaskSettings": { + "type": "object", + "properties": { + "user": { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_hugging_face:HuggingFaceTaskType": { "type": "string", "enum": [ @@ -48625,7 +48778,7 @@ "type": "object", "properties": { "user": { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "type": "string" } } diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 339e838046..1304b877b8 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4598,15 +4598,26 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD "since": "8.16.0", +======= + "since": "8.14.0", +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "stability": "stable", "visibility": "public" } }, +<<<<<<< HEAD "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-alibabacloud", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html", "name": "inference.put_alibabacloud", +======= + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureopenai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "name": "inference.put_azureopenai", +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "privileges": { "cluster": [ "manage_inference" @@ -4614,7 +4625,11 @@ }, "request": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4622,7 +4637,11 @@ ], "response": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, "responseMediaType": [ "application/json" @@ -4632,7 +4651,11 @@ "methods": [ "PUT" ], +<<<<<<< HEAD "path": "/_inference/{task_type}/{alibabacloud_inference_id}" +======= + "path": "/_inference/{task_type}/{azureopenai_inference_id}" +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } ] }, @@ -27118,6 +27141,7 @@ { <<<<<<< HEAD ======= +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", "name": "service", "required": true, @@ -27259,25 +27283,58 @@ }, { "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +======= +<<<<<<< HEAD + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +======= + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_amazonbedrock" +======= +<<<<<<< HEAD + "namespace": "inference.put_amazonbedrock" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } } }, { +<<<<<<< HEAD "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +======= +<<<<<<< HEAD + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AmazonBedrockServiceSettings", "namespace": "inference.put_amazonbedrock" +======= +<<<<<<< HEAD + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } } }, @@ -27288,13 +27345,27 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" +======= +<<<<<<< HEAD + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } } } ] }, +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { "PutAmazonBedrockRequestExample1": { @@ -27306,6 +27377,22 @@ "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", "summary": "A completion task", "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" +<<<<<<< HEAD +======= +======= + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } }, "inherits": { @@ -27317,24 +27404,56 @@ "kind": "request", "name": { "name": "Request", +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "namespace": "inference.put_amazonbedrock" }, "path": [ { "description": "The type of the inference task that the model will perform.", +<<<<<<< HEAD +======= +======= + "namespace": "inference.put_azureopenai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AmazonBedrockTaskType", "namespace": "inference.put_amazonbedrock" +======= +<<<<<<< HEAD + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } } }, { "description": "The unique identifier of the inference endpoint.", +<<<<<<< HEAD "name": "amazonbedrock_inference_id", +======= +<<<<<<< HEAD + "name": "amazonbedrock_inference_id", +======= + "name": "azureopenai_inference_id", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "required": true, "type": { "kind": "instance_of", @@ -27346,7 +27465,15 @@ } ], "query": [], +<<<<<<< HEAD + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +======= +<<<<<<< HEAD "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, { "body": { @@ -27362,6 +27489,10 @@ "kind": "response", "name": { "name": "Response", +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "namespace": "inference.put_amazonbedrock" }, "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" @@ -27642,6 +27773,7 @@ } }, { +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", "name": "service", "required": true, @@ -27772,6 +27904,8 @@ } }, { +======= +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "description": "The type of service supported for the specified task type. In this case, `cohere`.", "name": "service", "required": true, @@ -27879,6 +28013,14 @@ "namespace": "inference.put_cohere" }, "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" +<<<<<<< HEAD +======= +======= + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, { "attachedBehaviors": [ @@ -28521,7 +28663,11 @@ } }, { +<<<<<<< HEAD >>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) +======= +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", "name": "service", "required": true, @@ -102198,6 +102344,7 @@ "name": "completion" }, { +<<<<<<< HEAD "name": "rerank" }, { @@ -102233,6 +102380,8 @@ "name": "completion" }, { +======= +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "text_embedding" } ], @@ -102317,6 +102466,7 @@ "name": "completion" }, { +<<<<<<< HEAD "name": "text_embedding" } ], @@ -102346,6 +102496,8 @@ "name": "completion" }, { +======= +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "rerank" }, { @@ -102454,6 +102606,38 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD +======= + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, + { + "kind": "enum", + "members": [ + { +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "chat_completion" } ], @@ -102596,7 +102780,11 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD >>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) +======= +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "text_embedding" } ], @@ -123140,6 +123328,7 @@ "name": { <<<<<<< HEAD ======= +<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" }, @@ -123268,6 +123457,9 @@ { "kind": "interface", "name": { +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "AmazonBedrockServiceSettings", "namespace": "inference.put_amazonbedrock" }, @@ -123301,6 +123493,20 @@ { "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", "name": "provider", +<<<<<<< HEAD +======= +======= + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "required": false, "type": { "kind": "instance_of", @@ -123311,10 +123517,21 @@ } }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", "extDocId": "amazonbedrock-models", "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", "name": "region", +<<<<<<< HEAD +======= +======= + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "required": true, "type": { "kind": "instance_of", @@ -123325,7 +123542,45 @@ } }, { +<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", +======= +<<<<<<< HEAD + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", +======= + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "extDocId": "azureopenai-quota-limits", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "rate_limit", "required": false, "type": { @@ -123337,10 +123592,23 @@ } }, { +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", "extDocId": "amazonbedrock-secret-keys", "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", "name": "secret_key", +<<<<<<< HEAD +======= +======= + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "extDocId": "azureopenai-portal", + "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", + "name": "resource_name", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "required": true, "type": { "kind": "instance_of", @@ -123351,11 +123619,45 @@ } } ], +<<<<<<< HEAD + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +======= +<<<<<<< HEAD "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + }, + { + "kind": "interface", + "name": { +<<<<<<< HEAD +======= + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" }, { "kind": "interface", "name": { +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" }, @@ -123651,6 +123953,7 @@ }, { "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +<<<<<<< HEAD "name": "user", "required": false, "type": { @@ -123759,12 +124062,19 @@ { "kind": "interface", "name": { +======= +======= +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "AzureOpenAITaskSettings", "namespace": "inference.put_azureopenai" }, "properties": [ { "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +<<<<<<< HEAD +======= +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "user", "required": false, "type": { @@ -123776,7 +124086,12 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" +======= +<<<<<<< HEAD + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, { "kind": "interface", @@ -123908,6 +124223,12 @@ } ], "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" +<<<<<<< HEAD +======= +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, { "kind": "interface", @@ -123946,6 +124267,10 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "ElasticsearchServiceSettings", "namespace": "inference.put_elasticsearch" }, @@ -124337,7 +124662,11 @@ { "kind": "interface", "name": { +<<<<<<< HEAD >>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) +======= +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "HuggingFaceServiceSettings", "namespace": "inference.put_hugging_face" }, @@ -124408,6 +124737,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "MistralServiceSettings", "namespace": "inference.put_mistral" }, @@ -124470,6 +124800,10 @@ { "kind": "interface", "name": { +======= +======= +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "OpenAIServiceSettings", "namespace": "inference.put_openai" }, @@ -124562,7 +124896,7 @@ }, "properties": [ { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "name": "user", "required": false, "type": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 24732c8026..d670fbbb7e 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9348,6 +9348,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureopenai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "name": "inference.put_azureopenai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureopenai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150770,6 +150815,279 @@ }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "extDocId": "azureopenai-quota-limits", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "extDocId": "azureopenai-portal", + "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", + "name": "resource_name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" + }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + } + } + } + ] + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureopenai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, { "kind": "interface", "name": { @@ -151560,7 +151878,7 @@ }, "properties": [ { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "name": "user", "required": false, "type": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 6b33a679cd..f60031d59c 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13277,6 +13277,36 @@ export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo export type InferencePutAlibabacloudServiceType = 'alibabacloud-ai-search' +export interface InferencePutAzureopenaiAzureOpenAIServiceSettings { + api_key?: string + api_version: string + deployment_id: string + entra_id?: string + rate_limit?: InferenceRateLimitSetting + resource_name: string +} + +export interface InferencePutAzureopenaiAzureOpenAITaskSettings { + user?: string +} + +export type InferencePutAzureopenaiAzureOpenAITaskType = 'completion' | 'text_embedding' + +export interface InferencePutAzureopenaiRequest extends RequestBase { + task_type: InferencePutAzureopenaiAzureOpenAITaskType + azureopenai_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAzureopenaiServiceType + service_settings: InferencePutAzureopenaiAzureOpenAIServiceSettings + task_settings?: InferencePutAzureopenaiAzureOpenAITaskSettings + } +} + +export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo + +export type InferencePutAzureopenaiServiceType = 'azureopenai' + export interface InferencePutHuggingFaceHuggingFaceServiceSettings { api_key: string rate_limit?: InferenceRateLimitSetting diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index b0b8fe3e37..391312abcd 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -24,6 +24,12 @@ autoscaling-get-autoscaling-capacity,https://www.elastic.co/docs/api/doc/elastic autoscaling-get-autoscaling-policy,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-autoscaling-get-autoscaling-policy autoscaling-put-autoscaling-policy,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-autoscaling-put-autoscaling-policy avoid-index-pattern-collisions,https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html#avoid-index-pattern-collisions +azureaistudio-api-keys,https://ai.azure.com/ +azureaistudio-endpoint-types,https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio +azureopenai,https://oai.azure.com/ +azureopenai-auth,https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication +azureopenai-portal,https://portal.azure.com/#view/HubsExtension/BrowseAll +azureopenai-quota-limits,https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits behavioral-analytics-collection-event,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search-application-post-behavioral-analytics-event behavioral-analytics-event-reference,https://www.elastic.co/guide/en/elasticsearch/reference/current/behavioral-analytics-event-reference.html byte-units,https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#byte-units @@ -322,6 +328,7 @@ inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/o inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put inference-api-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html +inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html diff --git a/specification/_json_spec/inference.put_azureopenai.json b/specification/_json_spec/inference.put_azureopenai.json new file mode 100644 index 0000000000..8739adb1f5 --- /dev/null +++ b/specification/_json_spec/inference.put_azureopenai.json @@ -0,0 +1,35 @@ +{ + "inference.put_azureopenai": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "description": "Configure an Azure OpenAI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{azureopenai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "azureopenai_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts new file mode 100644 index 0000000000..e3b561861e --- /dev/null +++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts @@ -0,0 +1,152 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an Azure OpenAI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `azureopenai` service. + * + * The list of chat completion models that you can choose from in your Azure OpenAI deployment include: + * + * * [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models) + * * [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35) + * + * The list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings). + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_azureopenai + * @availability stack since=8.14.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-azureopenai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{azureopenai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. + */ + task_type: AzureOpenAITaskType + /** + * The unique identifier of the inference endpoint. + */ + azureopenai_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `azureopenai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `azureopenai` service. + */ + service_settings: AzureOpenAIServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AzureOpenAITaskSettings + } +} + +export enum AzureOpenAITaskType { + completion, + text_embedding +} + +export enum ServiceType { + azureopenai +} + +export class AzureOpenAIServiceSettings { + /** + * A valid API key for your Azure OpenAI account. + * You must specify either `api_key` or `entra_id`. + * If you do not provide either or you provide both, you will receive an error when you try to create your model. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id azureopenai-auth + */ + api_key?: string + /** + * The Azure API version ID to use. + * It is recommended to use the latest supported non-preview version. + */ + api_version: string + /** + * The deployment name of your deployed models. + * Your Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription. + * @ext_doc_id azureopenai + */ + deployment_id: string + /** + * A valid Microsoft Entra token. + * You must specify either `api_key` or `entra_id`. + * If you do not provide either or you provide both, you will receive an error when you try to create your model. + * @ext_doc_id azureopenai-auth + */ + entra_id?: string + /** + * This setting helps to minimize the number of rate limit errors returned from Azure. + * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type. + * For `text_embedding`, it is set to `1440`. + * For `completion`, it is set to `120`. + * @ext_doc_id azureopenai-quota-limits + */ + rate_limit?: RateLimitSetting + /** + * The name of your Azure OpenAI resource. + * You can find this from the list of resources in the Azure Portal for your subscription. + * @ext_doc_id azureopenai-portal + */ + resource_name: string +} + +export class AzureOpenAITaskSettings { + /** + * For a `completion` or `text_embedding` task, specify the user issuing the request. + * This information can be used for abuse detection. + */ + user?: string +} diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml new file mode 100644 index 0000000000..c6a992f994 --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment. +# method_request: "PUT _inference/text_embedding/azure_openai_embeddings" +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml new file mode 100644 index 0000000000..771de789fe --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task. +# method_request: "PUT _inference/completion/azure_openai_completion" +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts index 886905600e..8bf558e857 100644 --- a/specification/inference/put_openai/PutOpenAiRequest.ts +++ b/specification/inference/put_openai/PutOpenAiRequest.ts @@ -138,7 +138,7 @@ export class OpenAIServiceSettings { export class OpenAITaskSettings { /** * For a `completion` or `text_embedding` task, specify the user issuing the request. - * This informaiton can be used for abuse detection. + * This information can be used for abuse detection. */ user?: string }