diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 996e44e4b7..3f0da5b63c 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18014,6 +18014,92 @@ "x-state": "Added in 8.14.0" } }, + "/_inference/{task_type}/{azureopenai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureopenai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureopenai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureOpenAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{cohere_inference_id}": { "put": { "tags": [ @@ -78089,6 +78175,73 @@ } } }, + "inference.put_azureopenai:AzureOpenAITaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureopenai:ServiceType": { + "type": "string", + "enum": [ + "azureopenai" + ] + }, + "inference.put_azureopenai:AzureOpenAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "type": "string" + }, + "deployment_id": { + "externalDocs": { + "url": "https://oai.azure.com/" + }, + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "type": "string" + }, + "entra_id": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "resource_name": { + "externalDocs": { + "url": "https://portal.azure.com/#view/HubsExtension/BrowseAll" + }, + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "type": "string" + } + }, + "required": [ + "api_version", + "deployment_id", + "resource_name" + ] + }, + "inference.put_azureopenai:AzureOpenAITaskSettings": { + "type": "object", + "properties": { + "user": { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_cohere:CohereTaskType": { "type": "string", "enum": [ @@ -78590,7 +78743,7 @@ "type": "object", "properties": { "user": { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "type": "string" } } diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 4b2951c1d9..dc6fd9f932 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9836,6 +9836,92 @@ "x-state": "Added in 8.14.0" } }, + "/_inference/{task_type}/{azureopenai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureopenai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureopenai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureOpenAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{cohere_inference_id}": { "put": { "tags": [ @@ -49281,6 +49367,73 @@ } } }, + "inference.put_azureopenai:AzureOpenAITaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureopenai:ServiceType": { + "type": "string", + "enum": [ + "azureopenai" + ] + }, + "inference.put_azureopenai:AzureOpenAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "type": "string" + }, + "deployment_id": { + "externalDocs": { + "url": "https://oai.azure.com/" + }, + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "type": "string" + }, + "entra_id": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "resource_name": { + "externalDocs": { + "url": "https://portal.azure.com/#view/HubsExtension/BrowseAll" + }, + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "type": "string" + } + }, + "required": [ + "api_version", + "deployment_id", + "resource_name" + ] + }, + "inference.put_azureopenai:AzureOpenAITaskSettings": { + "type": "object", + "properties": { + "user": { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_cohere:CohereTaskType": { "type": "string", "enum": [ @@ -49782,7 +49935,7 @@ "type": "object", "properties": { "user": { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "type": "string" } } diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 2d403c65e0..bbf3b056c9 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4591,6 +4591,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureopenai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "name": "inference.put_azureopenai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureopenai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27495,26 +27540,43 @@ } }, { +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +======= + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_amazonbedrock" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) } } }, { +<<<<<<< HEAD "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AmazonBedrockServiceSettings", "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) } } }, @@ -27525,13 +27587,19 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) } } } ] }, +<<<<<<< HEAD "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { "PutAmazonBedrockRequestExample1": { @@ -27543,6 +27611,19 @@ "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", "summary": "A completion task", "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" +======= + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) } }, "inherits": { @@ -27554,24 +27635,41 @@ "kind": "request", "name": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_amazonbedrock" }, "path": [ { "description": "The type of the inference task that the model will perform.", +======= + "namespace": "inference.put_azureopenai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AmazonBedrockTaskType", "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) } } }, { "description": "The unique identifier of the inference endpoint.", +<<<<<<< HEAD "name": "amazonbedrock_inference_id", +======= + "name": "azureopenai_inference_id", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "required": true, "type": { "kind": "instance_of", @@ -27583,7 +27681,11 @@ } ], "query": [], +<<<<<<< HEAD "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) }, { "body": { @@ -27599,6 +27701,7 @@ "kind": "response", "name": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_amazonbedrock" }, "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" @@ -27986,6 +28089,11 @@ "namespace": "inference.put_cohere" }, "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" +======= + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) }, { "attachedBehaviors": [ @@ -102532,6 +102640,35 @@ }, "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117" }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, { "kind": "enum", "members": [ @@ -123243,6 +123380,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AmazonBedrockServiceSettings", "namespace": "inference.put_amazonbedrock" }, @@ -123276,6 +123414,17 @@ { "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", "name": "provider", +======= + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "required": false, "type": { "kind": "instance_of", @@ -123286,10 +123435,15 @@ } }, { +<<<<<<< HEAD "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", "extDocId": "amazonbedrock-models", "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", "name": "region", +======= + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "required": true, "type": { "kind": "instance_of", @@ -123300,7 +123454,41 @@ } }, { +<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", +======= + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "extDocId": "azureopenai-quota-limits", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "rate_limit", "required": false, "type": { @@ -123312,10 +123500,17 @@ } }, { +<<<<<<< HEAD "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", "extDocId": "amazonbedrock-secret-keys", "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", "name": "secret_key", +======= + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "extDocId": "azureopenai-portal", + "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", + "name": "resource_name", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "required": true, "type": { "kind": "instance_of", @@ -123326,7 +123521,11 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) }, { "kind": "interface", @@ -123353,6 +123552,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" }, @@ -123648,6 +123848,14 @@ }, { "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +======= + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "user", "required": false, "type": { @@ -123659,6 +123867,7 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" }, { @@ -123791,6 +124000,9 @@ } ], "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) }, { "kind": "interface", @@ -123829,6 +124041,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "ElasticsearchServiceSettings", "namespace": "inference.put_elasticsearch" }, @@ -124376,6 +124589,8 @@ { "kind": "interface", "name": { +======= +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "OpenAIServiceSettings", "namespace": "inference.put_openai" }, @@ -124468,7 +124683,7 @@ }, "properties": [ { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "name": "user", "required": false, "type": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 2abb7adbfe..b955176043 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9438,6 +9438,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureopenai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "name": "inference.put_azureopenai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureopenai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -9452,7 +9497,7 @@ }, "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-cohere", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html", "name": "inference.put_cohere", "privileges": { "cluster": [ @@ -151785,6 +151830,279 @@ }, "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "extDocId": "azureopenai-quota-limits", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "extDocId": "azureopenai-portal", + "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", + "name": "resource_name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" + }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + } + } + } + ] + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureopenai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, { "kind": "interface", "name": { @@ -153933,7 +154251,7 @@ }, "properties": [ { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "name": "user", "required": false, "type": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index acdb73f1cc..662e5bf780 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13348,6 +13348,36 @@ export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo export type InferencePutAzureaistudioServiceType = 'azureaistudio' +export interface InferencePutAzureopenaiAzureOpenAIServiceSettings { + api_key?: string + api_version: string + deployment_id: string + entra_id?: string + rate_limit?: InferenceRateLimitSetting + resource_name: string +} + +export interface InferencePutAzureopenaiAzureOpenAITaskSettings { + user?: string +} + +export type InferencePutAzureopenaiAzureOpenAITaskType = 'completion' | 'text_embedding' + +export interface InferencePutAzureopenaiRequest extends RequestBase { + task_type: InferencePutAzureopenaiAzureOpenAITaskType + azureopenai_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAzureopenaiServiceType + service_settings: InferencePutAzureopenaiAzureOpenAIServiceSettings + task_settings?: InferencePutAzureopenaiAzureOpenAITaskSettings + } +} + +export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo + +export type InferencePutAzureopenaiServiceType = 'azureopenai' + export interface InferencePutCohereCohereServiceSettings { api_key: string embedding_type?: InferencePutCohereEmbeddingType diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 9d508f19d1..a515236b6e 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -29,6 +29,10 @@ autoscaling-put-autoscaling-policy,https://www.elastic.co/docs/api/doc/elasticse avoid-index-pattern-collisions,https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html#avoid-index-pattern-collisions azureaistudio-api-keys,https://ai.azure.com/ azureaistudio-endpoint-types,https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio +azureopenai,https://oai.azure.com/ +azureopenai-auth,https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication +azureopenai-portal,https://portal.azure.com/#view/HubsExtension/BrowseAll +azureopenai-quota-limits,https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits behavioral-analytics-collection-event,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search-application-post-behavioral-analytics-event behavioral-analytics-event-reference,https://www.elastic.co/guide/en/elasticsearch/reference/current/behavioral-analytics-event-reference.html byte-units,https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#byte-units @@ -333,8 +337,9 @@ inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/o inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html -inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html +inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html +inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elasticsearch.html inference-api-put-elser,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html diff --git a/specification/_json_spec/inference.put_azureopenai.json b/specification/_json_spec/inference.put_azureopenai.json new file mode 100644 index 0000000000..8739adb1f5 --- /dev/null +++ b/specification/_json_spec/inference.put_azureopenai.json @@ -0,0 +1,35 @@ +{ + "inference.put_azureopenai": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "description": "Configure an Azure OpenAI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{azureopenai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "azureopenai_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts new file mode 100644 index 0000000000..e3b561861e --- /dev/null +++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts @@ -0,0 +1,152 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an Azure OpenAI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `azureopenai` service. + * + * The list of chat completion models that you can choose from in your Azure OpenAI deployment include: + * + * * [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models) + * * [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35) + * + * The list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings). + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_azureopenai + * @availability stack since=8.14.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-azureopenai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{azureopenai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. + */ + task_type: AzureOpenAITaskType + /** + * The unique identifier of the inference endpoint. + */ + azureopenai_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `azureopenai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `azureopenai` service. + */ + service_settings: AzureOpenAIServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AzureOpenAITaskSettings + } +} + +export enum AzureOpenAITaskType { + completion, + text_embedding +} + +export enum ServiceType { + azureopenai +} + +export class AzureOpenAIServiceSettings { + /** + * A valid API key for your Azure OpenAI account. + * You must specify either `api_key` or `entra_id`. + * If you do not provide either or you provide both, you will receive an error when you try to create your model. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id azureopenai-auth + */ + api_key?: string + /** + * The Azure API version ID to use. + * It is recommended to use the latest supported non-preview version. + */ + api_version: string + /** + * The deployment name of your deployed models. + * Your Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription. + * @ext_doc_id azureopenai + */ + deployment_id: string + /** + * A valid Microsoft Entra token. + * You must specify either `api_key` or `entra_id`. + * If you do not provide either or you provide both, you will receive an error when you try to create your model. + * @ext_doc_id azureopenai-auth + */ + entra_id?: string + /** + * This setting helps to minimize the number of rate limit errors returned from Azure. + * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type. + * For `text_embedding`, it is set to `1440`. + * For `completion`, it is set to `120`. + * @ext_doc_id azureopenai-quota-limits + */ + rate_limit?: RateLimitSetting + /** + * The name of your Azure OpenAI resource. + * You can find this from the list of resources in the Azure Portal for your subscription. + * @ext_doc_id azureopenai-portal + */ + resource_name: string +} + +export class AzureOpenAITaskSettings { + /** + * For a `completion` or `text_embedding` task, specify the user issuing the request. + * This information can be used for abuse detection. + */ + user?: string +} diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml new file mode 100644 index 0000000000..c6a992f994 --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment. +# method_request: "PUT _inference/text_embedding/azure_openai_embeddings" +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml new file mode 100644 index 0000000000..771de789fe --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task. +# method_request: "PUT _inference/completion/azure_openai_completion" +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts index 0d1c03b005..3453d0cff3 100644 --- a/specification/inference/put_openai/PutOpenAiRequest.ts +++ b/specification/inference/put_openai/PutOpenAiRequest.ts @@ -138,7 +138,7 @@ export class OpenAIServiceSettings { export class OpenAITaskSettings { /** * For a `completion` or `text_embedding` task, specify the user issuing the request. - * This informaiton can be used for abuse detection. + * This information can be used for abuse detection. */ user?: string }