diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 61b4394d2c..e9ad3b198a 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -16824,6 +16824,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{watsonx_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Watsonx inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-watsonx", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "watsonx_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_watsonx:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{inference_id}/_stream": { "post": { "tags": [ @@ -74398,6 +74466,71 @@ "relevance_score" ] }, + "inference.put_watsonx:WatsonxTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_watsonx:ServiceType": { + "type": "string", + "enum": [ + "watsonxai" + ] + }, + "inference.put_watsonx:WatsonxServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://cloud.ibm.com/iam/apikeys" + }, + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "externalDocs": { + "url": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates" + }, + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://www.ibm.com/products/watsonx-ai/foundation-models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "type": "string" + }, + "project_id": { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "type": "string" + } + }, + "required": [ + "api_key", + "api_version", + "model_id", + "project_id", + "url" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "_types:StreamResult": { "type": "object" }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 6c95652dd4..40ef5aa13f 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9037,6 +9037,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{watsonx_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Watsonx inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-watsonx", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "watsonx_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_watsonx:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{inference_id}/_unified": { "post": { "tags": [ @@ -46668,6 +46736,71 @@ "relevance_score" ] }, + "inference.put_watsonx:WatsonxTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_watsonx:ServiceType": { + "type": "string", + "enum": [ + "watsonxai" + ] + }, + "inference.put_watsonx:WatsonxServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://cloud.ibm.com/iam/apikeys" + }, + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "externalDocs": { + "url": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates" + }, + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://www.ibm.com/products/watsonx-ai/foundation-models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "type": "string" + }, + "project_id": { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "type": "string" + } + }, + "required": [ + "api_key", + "api_version", + "model_id", + "project_id", + "url" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "inference.unified_inference:Message": { "type": "object", "properties": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 399e9263fd..45370dcb4d 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9229,6 +9229,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-watsonx", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html", + "name": "inference.put_watsonx", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_watsonx" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_watsonx" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{watsonx_inference_id}" + } + ] + }, { "availability": { "stack": { @@ -147015,6 +147060,28 @@ ], "specLocation": "inference/_types/Results.ts#L67-L77" }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L96-L101" + }, { "kind": "type_alias", "name": { @@ -147526,6 +147593,212 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `watsonxai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_watsonx" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" + } + } + } + ] + }, + "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_watsonx" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "watsonx_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "watsonxai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L76-L78" + }, + { + "kind": "interface", + "name": { + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" + }, + "properties": [ + { + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "watsonx-api-keys", + "extDocUrl": "https://cloud.ibm.com/iam/apikeys", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "extDocId": "watsonx-api-version", + "extDocUrl": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates", + "name": "api_version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "extDocId": "watsonx-api-models", + "extDocUrl": "https://www.ibm.com/products/watsonx-ai/foundation-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "name": "project_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L80-L117" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L72-L74" + }, { "kind": "request", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index b4d2151ceb..42a9952274 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13088,6 +13088,10 @@ export interface InferenceRankedDocument { text?: string } +export interface InferenceRateLimitSetting { + requests_per_minute?: integer +} + export type InferenceServiceSettings = any export interface InferenceSparseEmbeddingResult { @@ -13147,6 +13151,30 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutWatsonxRequest extends RequestBase { + task_type: InferencePutWatsonxWatsonxTaskType + watsonx_inference_id: Id + body?: { + service: InferencePutWatsonxServiceType + service_settings: InferencePutWatsonxWatsonxServiceSettings + } +} + +export type InferencePutWatsonxResponse = InferenceInferenceEndpointInfo + +export type InferencePutWatsonxServiceType = 'watsonxai' + +export interface InferencePutWatsonxWatsonxServiceSettings { + api_key: string + api_version: string + model_id: string + project_id: string + rate_limit?: InferenceRateLimitSetting + url: string +} + +export type InferencePutWatsonxWatsonxTaskType = 'text_embedding' + export interface InferenceStreamInferenceRequest extends RequestBase { inference_id: Id task_type?: InferenceTaskType diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index e1de0370b6..9300d304b0 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -317,6 +317,7 @@ inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put +inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference inference-api-update,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-update inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-processor.html @@ -854,4 +855,7 @@ watcher-api-start,https://www.elastic.co/docs/api/doc/elasticsearch/operation/op watcher-api-stats,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-stats watcher-api-stop,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-stop watcher-api-update-settings,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-update-settings +watsonx-api-keys,https://cloud.ibm.com/iam/apikeys +watsonx-api-models,https://www.ibm.com/products/watsonx-ai/foundation-models +watsonx-api-version,https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates xpack-rollup,https://www.elastic.co/guide/en/elasticsearch/reference/current/xpack-rollup.html diff --git a/specification/_json_spec/inference.put.watsonx.json b/specification/_json_spec/inference.put.watsonx.json new file mode 100644 index 0000000000..db5a42d504 --- /dev/null +++ b/specification/_json_spec/inference.put.watsonx.json @@ -0,0 +1,35 @@ +{ + "inference.put_watsonx": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html", + "description": "Configure a Watsonx inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{watsonx_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "watsonx_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 53024633f5..f6d6a66ac1 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -92,3 +92,10 @@ export class InferenceChunkingSettings extends InferenceEndpoint { export type ServiceSettings = UserDefinedValue export type TaskSettings = UserDefinedValue + +export class RateLimitSetting { + /** + * The number of requests allowed per minute. + */ + requests_per_minute?: integer +} diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts new file mode 100644 index 0000000000..92b0eedb59 --- /dev/null +++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts @@ -0,0 +1,117 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RateLimitSetting } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create a Watsonx inference endpoint. + * + * Creates an inference endpoint to perform an inference task with the `watsonxai` service. + * You need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service. + * You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_watsonx + * @availability stack since=8.16.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-watsonx + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{watsonx_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The task type. + * The only valid task type for the model to perform is `text_embedding`. + */ + task_type: WatsonxTaskType + /** + * The unique identifier of the inference endpoint. + */ + watsonx_inference_id: Id + } + body: { + /** + * The type of service supported for the specified task type. In this case, `watsonxai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `watsonxai` service. + */ + service_settings: WatsonxServiceSettings + } +} + +export enum WatsonxTaskType { + text_embedding +} + +export enum ServiceType { + watsonxai +} + +export class WatsonxServiceSettings { + /** + * A valid API key of your Watsonx account. + * You can find your Watsonx API keys or you can create a new one on the API keys page. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id watsonx-api-keys + */ + api_key: string + /** + * A version parameter that takes a version date in the format of `YYYY-MM-DD`. + * For the active version data parameters, refer to the Wastonx documentation. + * @ext_doc_id watsonx-api-version + */ + api_version: string + /** + * The name of the model to use for the inference task. + * Refer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models. + * @ext_doc_id watsonx-api-models + */ + model_id: string + /** + * The identifier of the IBM Cloud project to use for the inference task. + */ + project_id: string + /** + * This setting helps to minimize the number of rate limit errors returned from Watsonx. + * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. + */ + rate_limit?: RateLimitSetting + /** + * The URL of the inference endpoint that you created on Watsonx. + */ + url: string +} diff --git a/specification/inference/put_watsonx/PutWatsonxResponse.ts b/specification/inference/put_watsonx/PutWatsonxResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_watsonx/PutWatsonxResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml b/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml new file mode 100644 index 0000000000..2320487516 --- /dev/null +++ b/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml @@ -0,0 +1,15 @@ +# summary: +description: Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/watsonx-embeddings" +# type: "request" +value: |- + { + "service": "watsonxai", + "service_settings": { + "api_key": "Watsonx-API-Key", + "url": "Wastonx-URL", + "model_id": "ibm/slate-30m-english-rtrvr", + "project_id": "IBM-Cloud-ID", + "api_version": "2024-03-14" + } + }