From c8bac297b2e2960aa71b042c3eacf8dc7b3ae869 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 09:49:47 -0700 Subject: [PATCH] Add Azure OpenAI inference details (#4019) (cherry picked from commit d5b1a529a13e4e8eabc56716ed86be6819bd0718) --- output/openapi/elasticsearch-openapi.json | 155 +- .../elasticsearch-serverless-openapi.json | 155 +- output/schema/schema-serverless.json | 1368 ++++++++++++++++- output/schema/schema.json | 320 +++- output/typescript/types.ts | 30 + specification/_doc_ids/table.csv | 5 + .../_json_spec/inference.put_azureopenai.json | 35 + .../put_azureopenai/PutAzureOpenAiRequest.ts | 152 ++ .../put_azureopenai/PutAzureOpenAiResponse.ts | 24 + .../PutAzureOpenAiRequestExample1.yaml | 14 + .../PutAzureOpenAiRequestExample2.yaml | 14 + .../inference/put_openai/PutOpenAiRequest.ts | 2 +- 12 files changed, 2269 insertions(+), 5 deletions(-) create mode 100644 specification/_json_spec/inference.put_azureopenai.json create mode 100644 specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts create mode 100644 specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts create mode 100644 specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml create mode 100644 specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index c1b571220a..1a512cbd92 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17937,6 +17937,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureopenai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureopenai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureopenai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureOpenAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{eis_inference_id}": { "put": { "tags": [ @@ -77309,6 +77395,73 @@ } } }, + "inference.put_azureopenai:AzureOpenAITaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureopenai:ServiceType": { + "type": "string", + "enum": [ + "azureopenai" + ] + }, + "inference.put_azureopenai:AzureOpenAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "type": "string" + }, + "deployment_id": { + "externalDocs": { + "url": "https://oai.azure.com/" + }, + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "type": "string" + }, + "entra_id": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "resource_name": { + "externalDocs": { + "url": "https://portal.azure.com/#view/HubsExtension/BrowseAll" + }, + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "type": "string" + } + }, + "required": [ + "api_version", + "deployment_id", + "resource_name" + ] + }, + "inference.put_azureopenai:AzureOpenAITaskSettings": { + "type": "object", + "properties": { + "user": { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_eis:EisTaskType": { "type": "string", "enum": [ @@ -77498,7 +77651,7 @@ "type": "object", "properties": { "user": { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "type": "string" } } diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index bd05933bf3..61d6307545 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9903,6 +9903,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureopenai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureopenai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureopenai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureOpenAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{eis_inference_id}": { "put": { "tags": [ @@ -48639,6 +48725,73 @@ } } }, + "inference.put_azureopenai:AzureOpenAITaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureopenai:ServiceType": { + "type": "string", + "enum": [ + "azureopenai" + ] + }, + "inference.put_azureopenai:AzureOpenAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "type": "string" + }, + "deployment_id": { + "externalDocs": { + "url": "https://oai.azure.com/" + }, + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "type": "string" + }, + "entra_id": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication" + }, + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "resource_name": { + "externalDocs": { + "url": "https://portal.azure.com/#view/HubsExtension/BrowseAll" + }, + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "type": "string" + } + }, + "required": [ + "api_version", + "deployment_id", + "resource_name" + ] + }, + "inference.put_azureopenai:AzureOpenAITaskSettings": { + "type": "object", + "properties": { + "user": { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_eis:EisTaskType": { "type": "string", "enum": [ @@ -48828,7 +48981,7 @@ "type": "object", "properties": { "user": { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "type": "string" } } diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index eb9a2bde5f..d4314c078c 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4765,15 +4765,26 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD "since": "8.16.0", +======= + "since": "8.14.0", +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "stability": "stable", "visibility": "public" } }, +<<<<<<< HEAD "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-alibabacloud", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-alibabacloud-ai-search.html", "name": "inference.put_alibabacloud", +======= + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureopenai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "name": "inference.put_azureopenai", +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "privileges": { "cluster": [ "manage_inference" @@ -4781,7 +4792,11 @@ }, "request": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4789,7 +4804,11 @@ ], "response": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, "responseMediaType": [ "application/json" @@ -4799,7 +4818,11 @@ "methods": [ "PUT" ], +<<<<<<< HEAD "path": "/_inference/{task_type}/{alibabacloud_inference_id}" +======= + "path": "/_inference/{task_type}/{azureopenai_inference_id}" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } ] }, @@ -27633,6 +27656,11 @@ "kind": "properties", "properties": [ { +<<<<<<< HEAD +======= +<<<<<<< HEAD +======= +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", @@ -27647,26 +27675,60 @@ } }, { +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", +======= +<<<<<<< HEAD + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +======= + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD + "namespace": "inference.put_amazonbedrock" +======= + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } } }, { +<<<<<<< HEAD "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", +======= +<<<<<<< HEAD + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } } }, @@ -27677,13 +27739,24 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudTaskSettings", "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } } } ] }, +<<<<<<< HEAD "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { "PutAlibabaCloudRequestExample1": { @@ -27705,6 +27778,33 @@ "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", "summary": "A text embedding task", "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" +======= +<<<<<<< HEAD + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" +======= + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } }, "inherits": { @@ -27716,24 +27816,57 @@ "kind": "request", "name": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD + "namespace": "inference.put_amazonbedrock" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, "path": [ { "description": "The type of the inference task that the model will perform.", +<<<<<<< HEAD +======= +======= + "namespace": "inference.put_azureopenai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudTaskType", "namespace": "inference.put_alibabacloud" +======= +<<<<<<< HEAD + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" +======= + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } } }, { "description": "The unique identifier of the inference endpoint.", +<<<<<<< HEAD "name": "alibabacloud_inference_id", +======= +<<<<<<< HEAD + "name": "amazonbedrock_inference_id", +======= + "name": "azureopenai_inference_id", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "required": true, "type": { "kind": "instance_of", @@ -27745,7 +27878,15 @@ } ], "query": [], +<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" +======= +<<<<<<< HEAD + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, { "body": { @@ -27761,9 +27902,407 @@ "kind": "response", "name": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" +======= +<<<<<<< HEAD + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { +<<<<<<< HEAD +======= + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + } + } + } + ] + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "anthropic_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + } + } + } + ] + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `cohere`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" + } + } + } + ] + }, + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutCohereRequestExample1": { + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_cohere" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "cohere_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" +======= + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) }, { "attachedBehaviors": [ @@ -27773,6 +28312,8 @@ "kind": "properties", "properties": [ { +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "description": "The type of service supported for the specified task type. In this case, `elastic`.", "name": "service", "required": true, @@ -100923,6 +101464,7 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "completion" }, { @@ -100932,32 +101474,255 @@ "name": "space_embedding" }, { +======= +<<<<<<< HEAD +======= + "name": "completion" + }, + { +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "text_embedding" } ], "name": { +<<<<<<< HEAD "name": "AlibabaCloudTaskType", "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" +======= + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, { "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "alibabacloud-ai-search" +======= + "name": "amazonbedrock" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) } ], "name": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" +======= + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) + }, + { + "kind": "enum", + "members": [ + { +<<<<<<< HEAD +======= + "name": "completion" + } + ], + "name": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86" + }, + { + "kind": "enum", + "members": [ + { + "name": "anthropic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" }, { "kind": "enum", "members": [ { + "name": "azureaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "rerank" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "CohereTaskType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L84-L88" + }, + { + "kind": "enum", + "members": [ + { + "name": "byte" + }, + { + "name": "float" + }, + { + "name": "int8" + } + ], + "name": { + "name": "EmbeddingType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L94-L98" + }, + { + "kind": "enum", + "members": [ + { + "name": "classification" + }, + { + "name": "clustering" + }, + { + "name": "ingest" + }, + { + "name": "search" + } + ], + "name": { + "name": "InputType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L100-L105" + }, + { + "kind": "enum", + "members": [ + { + "name": "cohere" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L90-L92" + }, + { + "kind": "enum", + "members": [ + { + "name": "cosine" + }, + { + "name": "dot_product" + }, + { + "name": "l2_norm" + } + ], + "name": { + "name": "SimilarityType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L107-L111" + }, + { + "kind": "enum", + "members": [ + { + "name": "END" + }, + { + "name": "NONE" + }, + { + "name": "START" + } + ], + "name": { + "name": "TruncateType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, + { + "kind": "enum", + "members": [ + { +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "chat_completion" } ], @@ -121600,6 +122365,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" }, @@ -121607,6 +122373,17 @@ { "description": "A valid API key for the AlibabaCloud AI Search API.", "name": "api_key", +======= +<<<<<<< HEAD +======= +<<<<<<< HEAD + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", "required": true, "type": { "kind": "instance_of", @@ -121617,10 +122394,77 @@ } }, { + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", +======= + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { +<<<<<<< HEAD + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", +======= + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { +<<<<<<< HEAD "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", "extDocId": "alibabacloud-api-keys", "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", "name": "host", +======= +<<<<<<< HEAD + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", +======= + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "required": true, "type": { "kind": "instance_of", @@ -121631,7 +122475,28 @@ } }, { +<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", +======= + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "extDocId": "azureopenai-quota-limits", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "rate_limit", "required": false, "type": { @@ -121643,6 +122508,7 @@ } }, { +<<<<<<< HEAD "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max รท qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", "name": "service_id", "required": true, @@ -121657,6 +122523,19 @@ { "description": "The name of the workspace used for the inference task.", "name": "workspace", +======= +<<<<<<< HEAD + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", +======= + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "extDocId": "azureopenai-portal", + "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", + "name": "resource_name", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "required": true, "type": { "kind": "instance_of", @@ -121667,7 +122546,15 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" +======= +<<<<<<< HEAD + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, { "kind": "interface", @@ -121694,6 +122581,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AlibabaCloudTaskSettings", "namespace": "inference.put_alibabacloud" }, @@ -121701,7 +122589,184 @@ { "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", "name": "input_type", +======= +<<<<<<< HEAD + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "A valid API key for the Anthropic API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "extDocId": "anothropic-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "name": "max_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "extDocId": "anthropic-messages", + "extDocUrl": "https://docs.anthropic.com/en/api/messages", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_k", "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", + "name": "api_key", + "required": true, "type": { "kind": "instance_of", "type": { @@ -121711,8 +122776,239 @@ } }, { + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +======= + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], +<<<<<<< HEAD + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" + }, + { + "kind": "interface", + "name": { + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + }, + "properties": [ + { + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "cohere-api-keys", + "extDocUrl": "https://dashboard.cohere.com/api-keys", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.", + "name": "embedding_type", + "required": false, + "serverDefault": "float", + "type": { + "kind": "instance_of", + "type": { + "name": "EmbeddingType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "name": "model_id", +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { +<<<<<<< HEAD "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", "name": "return_token", +======= + "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "SimilarityType", + "namespace": "inference.put_cohere" + } + } + } + ], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" + }, + { + "kind": "interface", + "name": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" + }, + "properties": [ + { + "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InputType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `rerank` task, return doc text within the results.", + "name": "return_documents", +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "required": false, "type": { "kind": "instance_of", @@ -121721,13 +123017,50 @@ "namespace": "_builtins" } } +<<<<<<< HEAD } ], "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" +======= + }, + { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.", + "name": "truncate", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TruncateType", + "namespace": "inference.put_cohere" + } + } + } + ], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" +======= + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) }, { "kind": "interface", "name": { +<<<<<<< HEAD +======= +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "EisServiceSettings", "namespace": "inference.put_eis" }, @@ -121762,6 +123095,37 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= +<<<<<<< HEAD + "name": "RateLimitSetting", + "namespace": "inference._types" +======= +<<<<<<< HEAD + "name": "ElasticsearchServiceSettings", + "namespace": "inference.put_elasticsearch" +>>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { +>>>>>>> 28c855b3f (Add Azure OpenAI inference details (#4019)) "name": "HuggingFaceServiceSettings", "namespace": "inference.put_hugging_face" }, @@ -121918,6 +123282,8 @@ { "kind": "interface", "name": { +======= +>>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) "name": "OpenAIServiceSettings", "namespace": "inference.put_openai" }, @@ -122010,7 +123376,7 @@ }, "properties": [ { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "name": "user", "required": false, "type": { diff --git a/output/schema/schema.json b/output/schema/schema.json index d4248f6b5c..a634273a15 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9396,6 +9396,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureopenai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html", + "name": "inference.put_azureopenai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureopenai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150854,6 +150899,279 @@ }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", + "extDocId": "azureopenai-quota-limits", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", + "extDocId": "azureopenai-portal", + "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", + "name": "resource_name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" + }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" + } + } + } + ] + }, + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_azureopenai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureopenai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, { "kind": "interface", "name": { @@ -151596,7 +151914,7 @@ }, "properties": [ { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis informaiton can be used for abuse detection.", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", "name": "user", "required": false, "type": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 71be3658a6..e93b1f9a02 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13279,6 +13279,36 @@ export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo export type InferencePutAlibabacloudServiceType = 'alibabacloud-ai-search' +export interface InferencePutAzureopenaiAzureOpenAIServiceSettings { + api_key?: string + api_version: string + deployment_id: string + entra_id?: string + rate_limit?: InferenceRateLimitSetting + resource_name: string +} + +export interface InferencePutAzureopenaiAzureOpenAITaskSettings { + user?: string +} + +export type InferencePutAzureopenaiAzureOpenAITaskType = 'completion' | 'text_embedding' + +export interface InferencePutAzureopenaiRequest extends RequestBase { + task_type: InferencePutAzureopenaiAzureOpenAITaskType + azureopenai_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAzureopenaiServiceType + service_settings: InferencePutAzureopenaiAzureOpenAIServiceSettings + task_settings?: InferencePutAzureopenaiAzureOpenAITaskSettings + } +} + +export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo + +export type InferencePutAzureopenaiServiceType = 'azureopenai' + export interface InferencePutEisEisServiceSettings { model_id: string rate_limit?: InferenceRateLimitSetting diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index bb6575b41a..2f31088d2e 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -24,6 +24,10 @@ autoscaling-get-autoscaling-capacity,https://www.elastic.co/guide/en/elasticsear autoscaling-get-autoscaling-policy,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/autoscaling-get-autoscaling-policy.html autoscaling-put-autoscaling-policy,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/autoscaling-put-autoscaling-policy.html avoid-index-pattern-collisions,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/index-templates.html#avoid-index-pattern-collisions +azureopenai,https://oai.azure.com/ +azureopenai-auth,https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication +azureopenai-portal,https://portal.azure.com/#view/HubsExtension/BrowseAll +azureopenai-quota-limits,https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits behavioral-analytics-collection-event,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-analytics-collection-event.html behavioral-analytics-event-reference,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/behavioral-analytics-event-reference.html byte-units,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/api-conventions.html#byte-units @@ -322,6 +326,7 @@ inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{bran inference-api-post-eis-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html inference-api-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-alibabacloud-ai-search.html +inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-hugging-face.html inference-api-put-jinaai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-jinaai.html diff --git a/specification/_json_spec/inference.put_azureopenai.json b/specification/_json_spec/inference.put_azureopenai.json new file mode 100644 index 0000000000..8739adb1f5 --- /dev/null +++ b/specification/_json_spec/inference.put_azureopenai.json @@ -0,0 +1,35 @@ +{ + "inference.put_azureopenai": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", + "description": "Configure an Azure OpenAI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{azureopenai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "azureopenai_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts new file mode 100644 index 0000000000..e3b561861e --- /dev/null +++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts @@ -0,0 +1,152 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an Azure OpenAI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `azureopenai` service. + * + * The list of chat completion models that you can choose from in your Azure OpenAI deployment include: + * + * * [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models) + * * [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35) + * + * The list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings). + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_azureopenai + * @availability stack since=8.14.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-azureopenai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{azureopenai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. + */ + task_type: AzureOpenAITaskType + /** + * The unique identifier of the inference endpoint. + */ + azureopenai_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `azureopenai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `azureopenai` service. + */ + service_settings: AzureOpenAIServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AzureOpenAITaskSettings + } +} + +export enum AzureOpenAITaskType { + completion, + text_embedding +} + +export enum ServiceType { + azureopenai +} + +export class AzureOpenAIServiceSettings { + /** + * A valid API key for your Azure OpenAI account. + * You must specify either `api_key` or `entra_id`. + * If you do not provide either or you provide both, you will receive an error when you try to create your model. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id azureopenai-auth + */ + api_key?: string + /** + * The Azure API version ID to use. + * It is recommended to use the latest supported non-preview version. + */ + api_version: string + /** + * The deployment name of your deployed models. + * Your Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription. + * @ext_doc_id azureopenai + */ + deployment_id: string + /** + * A valid Microsoft Entra token. + * You must specify either `api_key` or `entra_id`. + * If you do not provide either or you provide both, you will receive an error when you try to create your model. + * @ext_doc_id azureopenai-auth + */ + entra_id?: string + /** + * This setting helps to minimize the number of rate limit errors returned from Azure. + * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type. + * For `text_embedding`, it is set to `1440`. + * For `completion`, it is set to `120`. + * @ext_doc_id azureopenai-quota-limits + */ + rate_limit?: RateLimitSetting + /** + * The name of your Azure OpenAI resource. + * You can find this from the list of resources in the Azure Portal for your subscription. + * @ext_doc_id azureopenai-portal + */ + resource_name: string +} + +export class AzureOpenAITaskSettings { + /** + * For a `completion` or `text_embedding` task, specify the user issuing the request. + * This information can be used for abuse detection. + */ + user?: string +} diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml new file mode 100644 index 0000000000..c6a992f994 --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment. +# method_request: "PUT _inference/text_embedding/azure_openai_embeddings" +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml new file mode 100644 index 0000000000..771de789fe --- /dev/null +++ b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task. +# method_request: "PUT _inference/completion/azure_openai_completion" +# type: "request" +value: |- + { + "service": "azureopenai", + "service_settings": { + "api_key": "Api-Key", + "resource_name": "Resource-name", + "deployment_id": "Deployment-id", + "api_version": "2024-02-01" + } + } diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts index 886905600e..8bf558e857 100644 --- a/specification/inference/put_openai/PutOpenAiRequest.ts +++ b/specification/inference/put_openai/PutOpenAiRequest.ts @@ -138,7 +138,7 @@ export class OpenAIServiceSettings { export class OpenAITaskSettings { /** * For a `completion` or `text_embedding` task, specify the user issuing the request. - * This informaiton can be used for abuse detection. + * This information can be used for abuse detection. */ user?: string }