From fbbe73616985ef2eb3e9ae583bd7dc3c89ed128f Mon Sep 17 00:00:00 2001 From: lcawl Date: Mon, 3 Mar 2025 22:42:38 -0800 Subject: [PATCH 1/4] Draft Watsonx inference API --- specification/_doc_ids/table.csv | 4 + .../_json_spec/inference.put.watsonx.json | 31 +++++ .../put_watsonx/PutWatsonxRequest.ts | 116 ++++++++++++++++++ .../put_watsonx/PutWatsonxResponse.ts | 24 ++++ .../request/InferenceRequestExample1.yaml | 15 +++ 5 files changed, 190 insertions(+) create mode 100644 specification/_json_spec/inference.put.watsonx.json create mode 100644 specification/inference/put_watsonx/PutWatsonxRequest.ts create mode 100644 specification/inference/put_watsonx/PutWatsonxResponse.ts create mode 100644 specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index dc6630c992..fd821fc403 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -310,6 +310,7 @@ inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put +inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference inference-api-update,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-update inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-processor.html @@ -847,4 +848,7 @@ watcher-api-start,https://www.elastic.co/docs/api/doc/elasticsearch/operation/op watcher-api-stats,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-stats watcher-api-stop,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-stop watcher-api-update-settings,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-update-settings +watsonx-api-keys,https://cloud.ibm.com/iam/apikeys +watsonx-api-models,https://www.ibm.com/products/watsonx-ai/foundation-models +watsonx-api-version,https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates xpack-rollup,https://www.elastic.co/guide/en/elasticsearch/reference/current/xpack-rollup.html diff --git a/specification/_json_spec/inference.put.watsonx.json b/specification/_json_spec/inference.put.watsonx.json new file mode 100644 index 0000000000..b4c1a77278 --- /dev/null +++ b/specification/_json_spec/inference.put.watsonx.json @@ -0,0 +1,31 @@ +{ + "inference.put": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html", + "description": "Configure a Watsonx inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/text_embedding/{watsonx_inference_id}", + "methods": ["PUT"], + "parts": { + "watsonx_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts new file mode 100644 index 0000000000..0ce19294aa --- /dev/null +++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts @@ -0,0 +1,116 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { integer } from '@_types/Numeric' + +/** + * Create a Watsonx inference endpoint. + * + * Creates an inference endpoint to perform an inference task with the `watsonxai` service. + * The only valid task type for the model to perform is `text_embedding`. + * You need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service. + * You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_watsonx + * @availability stack since=8.11.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-watsonx + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/text_embedding/{watsonx_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The unique identifier of the inference endpoint. + */ + watsonx_inference_id: Id + } + body: { + /** + * The type of service supported for the specified task type. In this case, `watsonxai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `watsonxai` service. + */ + service_settings: WatsonxServiceSettings + } +} + +export enum ServiceType { + watsonxai +} + +export class RateLimitSetting { + /** + * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. + * @server_default 120 + */ + requests_per_minute?: integer +} + +export class WatsonxServiceSettings { + /** + * A valid API key of your Watsonx account. + * You can find your Watsonx API keys or you can create a new one on the API keys page. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id watsonx-api-keys + */ + api_key: string + /** + * A version parameter that takes a version date in the format of `YYYY-MM-DD`. + * For the active version data parameters, refer to the Wastonx documentation. + * @ext_doc_id watsonx-api-version + */ + api_version: string + /** + * The name of the model to use for the inference task. + * Refer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models. + * @ext_doc_id watsonx-api-models + */ + model_id: string + /** + * The identifier of the IBM Cloud project to use for the inference task. + */ + project_id: string + /** + * This setting helps to minimize the number of rate limit errors returned from Watsonx. + */ + rate_limit?: RateLimitSetting + /** + * The URL of the inference endpoint that you created on Watsonx. + */ + url: string +} diff --git a/specification/inference/put_watsonx/PutWatsonxResponse.ts b/specification/inference/put_watsonx/PutWatsonxResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_watsonx/PutWatsonxResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml b/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml new file mode 100644 index 0000000000..2320487516 --- /dev/null +++ b/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml @@ -0,0 +1,15 @@ +# summary: +description: Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/watsonx-embeddings" +# type: "request" +value: |- + { + "service": "watsonxai", + "service_settings": { + "api_key": "Watsonx-API-Key", + "url": "Wastonx-URL", + "model_id": "ibm/slate-30m-english-rtrvr", + "project_id": "IBM-Cloud-ID", + "api_version": "2024-03-14" + } + } From 035a1cc084d5d5ef9ad7a2ea92851eeebf3716bd Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 4 Mar 2025 08:40:17 -0800 Subject: [PATCH 2/4] Update specification/_json_spec/inference.put.watsonx.json Co-authored-by: Laura Trotta <153528055+l-trotta@users.noreply.github.com> --- specification/_json_spec/inference.put.watsonx.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/_json_spec/inference.put.watsonx.json b/specification/_json_spec/inference.put.watsonx.json index b4c1a77278..c7862c82a0 100644 --- a/specification/_json_spec/inference.put.watsonx.json +++ b/specification/_json_spec/inference.put.watsonx.json @@ -1,5 +1,5 @@ { - "inference.put": { + "inference.put_watsonx": { "documentation": { "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html", "description": "Configure a Watsonx inference endpoint" From 9004cda33d7c0f15713d531e25b10e86a608ac7a Mon Sep 17 00:00:00 2001 From: lcawl Date: Tue, 4 Mar 2025 13:22:14 -0800 Subject: [PATCH 3/4] Generate output --- output/openapi/elasticsearch-openapi.json | 133 +++++++++ .../elasticsearch-serverless-openapi.json | 133 +++++++++ output/schema/schema.json | 273 ++++++++++++++++++ output/schema/validation-errors.json | 6 + output/typescript/types.ts | 28 ++ .../_json_spec/inference.put.watsonx.json | 6 +- specification/inference/_types/Services.ts | 7 + .../put_watsonx/PutWatsonxRequest.ts | 25 +- 8 files changed, 598 insertions(+), 13 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 862c9f976a..ed75d6756f 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -16818,6 +16818,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{watsonx_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Watsonx inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-watsonx", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "watsonx_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_watsonx:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{inference_id}/_stream": { "post": { "tags": [ @@ -74327,6 +74395,71 @@ "relevance_score" ] }, + "inference.put_watsonx:WatsonxTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_watsonx:ServiceType": { + "type": "string", + "enum": [ + "watsonxai" + ] + }, + "inference.put_watsonx:WatsonxServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://cloud.ibm.com/iam/apikeys" + }, + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "externalDocs": { + "url": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates" + }, + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://www.ibm.com/products/watsonx-ai/foundation-models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "type": "string" + }, + "project_id": { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "type": "string" + } + }, + "required": [ + "api_key", + "api_version", + "model_id", + "project_id", + "url" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "_types:StreamResult": { "type": "object" }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 7c804d0902..d5a2415100 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9037,6 +9037,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{watsonx_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Watsonx inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-watsonx", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "watsonx_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_watsonx:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_watsonx:WatsonxServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{inference_id}/_unified": { "post": { "tags": [ @@ -46604,6 +46672,71 @@ "relevance_score" ] }, + "inference.put_watsonx:WatsonxTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_watsonx:ServiceType": { + "type": "string", + "enum": [ + "watsonxai" + ] + }, + "inference.put_watsonx:WatsonxServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://cloud.ibm.com/iam/apikeys" + }, + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "externalDocs": { + "url": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates" + }, + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://www.ibm.com/products/watsonx-ai/foundation-models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "type": "string" + }, + "project_id": { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "type": "string" + } + }, + "required": [ + "api_key", + "api_version", + "model_id", + "project_id", + "url" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "inference.unified_inference:Message": { "type": "object", "properties": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 3d259c6bbd..0755afdd24 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9221,6 +9221,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-watsonx", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html", + "name": "inference.put_watsonx", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_watsonx" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_watsonx" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{watsonx_inference_id}" + } + ] + }, { "availability": { "stack": { @@ -146906,6 +146951,28 @@ ], "specLocation": "inference/_types/Results.ts#L67-L77" }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L96-L101" + }, { "kind": "type_alias", "name": { @@ -147417,6 +147484,212 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `watsonxai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_watsonx" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" + } + } + } + ] + }, + "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_watsonx" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "watsonx_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "watsonxai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L76-L78" + }, + { + "kind": "interface", + "name": { + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" + }, + "properties": [ + { + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "watsonx-api-keys", + "extDocUrl": "https://cloud.ibm.com/iam/apikeys", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "extDocId": "watsonx-api-version", + "extDocUrl": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates", + "name": "api_version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "extDocId": "watsonx-api-models", + "extDocUrl": "https://www.ibm.com/products/watsonx-ai/foundation-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "name": "project_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L80-L117" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L72-L74" + }, { "kind": "request", "attachedBehaviors": [ diff --git a/output/schema/validation-errors.json b/output/schema/validation-errors.json index e8645306c9..e579ee7e1e 100644 --- a/output/schema/validation-errors.json +++ b/output/schema/validation-errors.json @@ -156,6 +156,12 @@ ], "response": [] }, + "inference.put_watsonx": { + "request": [ + "Request: path parameter 'task_type' is required in the json spec" + ], + "response": [] + }, "inference.update": { "request": [ "/_inference/{inference_id}/_update: different http methods in the json spec", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 5cad42a45a..7d1bd225de 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13078,6 +13078,10 @@ export interface InferenceRankedDocument { text?: string } +export interface InferenceRateLimitSetting { + requests_per_minute?: integer +} + export type InferenceServiceSettings = any export interface InferenceSparseEmbeddingResult { @@ -13137,6 +13141,30 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutWatsonxRequest extends RequestBase { + task_type?: InferencePutWatsonxWatsonxTaskType + watsonx_inference_id: Id + body?: { + service: InferencePutWatsonxServiceType + service_settings: InferencePutWatsonxWatsonxServiceSettings + } +} + +export type InferencePutWatsonxResponse = InferenceInferenceEndpointInfo + +export type InferencePutWatsonxServiceType = 'watsonxai' + +export interface InferencePutWatsonxWatsonxServiceSettings { + api_key: string + api_version: string + model_id: string + project_id: string + rate_limit?: InferenceRateLimitSetting + url: string +} + +export type InferencePutWatsonxWatsonxTaskType = 'text_embedding' + export interface InferenceStreamInferenceRequest extends RequestBase { inference_id: Id task_type?: InferenceTaskType diff --git a/specification/_json_spec/inference.put.watsonx.json b/specification/_json_spec/inference.put.watsonx.json index c7862c82a0..db5a42d504 100644 --- a/specification/_json_spec/inference.put.watsonx.json +++ b/specification/_json_spec/inference.put.watsonx.json @@ -13,9 +13,13 @@ "url": { "paths": [ { - "path": "/_inference/text_embedding/{watsonx_inference_id}", + "path": "/_inference/{task_type}/{watsonx_inference_id}", "methods": ["PUT"], "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, "watsonx_inference_id": { "type": "string", "description": "The inference Id" diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 53024633f5..f6d6a66ac1 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -92,3 +92,10 @@ export class InferenceChunkingSettings extends InferenceEndpoint { export type ServiceSettings = UserDefinedValue export type TaskSettings = UserDefinedValue + +export class RateLimitSetting { + /** + * The number of requests allowed per minute. + */ + requests_per_minute?: integer +} diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts index 0ce19294aa..b78fdd6719 100644 --- a/specification/inference/put_watsonx/PutWatsonxRequest.ts +++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts @@ -17,15 +17,14 @@ * under the License. */ +import { RateLimitSetting } from '@inference/_types/Services' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' -import { integer } from '@_types/Numeric' /** * Create a Watsonx inference endpoint. * * Creates an inference endpoint to perform an inference task with the `watsonxai` service. - * The only valid task type for the model to perform is `text_embedding`. * You need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service. * You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform. * @@ -35,7 +34,7 @@ import { integer } from '@_types/Numeric' * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. * @rest_spec_name inference.put_watsonx - * @availability stack since=8.11.0 stability=stable visibility=public + * @availability stack since=8.16.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public * @cluster_privileges manage_inference * @doc_id inference-api-put-watsonx @@ -43,11 +42,16 @@ import { integer } from '@_types/Numeric' export interface Request extends RequestBase { urls: [ { - path: '/_inference/text_embedding/{watsonx_inference_id}' + path: '/_inference/{task_type}/{watsonx_inference_id}' methods: ['PUT'] } ] path_parts: { + /** + * The task type. + * The only valid task type for the model to perform is `text_embedding`. + */ + task_type?: WatsonxTaskType /** * The unique identifier of the inference endpoint. */ @@ -65,16 +69,12 @@ export interface Request extends RequestBase { } } -export enum ServiceType { - watsonxai +export enum WatsonxTaskType { + text_embedding } -export class RateLimitSetting { - /** - * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. - * @server_default 120 - */ - requests_per_minute?: integer +export enum ServiceType { + watsonxai } export class WatsonxServiceSettings { @@ -107,6 +107,7 @@ export class WatsonxServiceSettings { project_id: string /** * This setting helps to minimize the number of rate limit errors returned from Watsonx. + * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. */ rate_limit?: RateLimitSetting /** From f84a6202cfb59ed0c52ec828a4db2f6e55fd6c28 Mon Sep 17 00:00:00 2001 From: lcawl Date: Tue, 4 Mar 2025 19:55:20 -0800 Subject: [PATCH 4/4] Fix required query parameter --- output/schema/schema.json | 2 +- output/schema/validation-errors.json | 6 ------ output/typescript/types.ts | 2 +- specification/inference/put_watsonx/PutWatsonxRequest.ts | 2 +- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index f3586e1833..3ef5c2fdfe 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -147616,7 +147616,7 @@ { "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", "name": "task_type", - "required": false, + "required": true, "type": { "kind": "instance_of", "type": { diff --git a/output/schema/validation-errors.json b/output/schema/validation-errors.json index e579ee7e1e..e8645306c9 100644 --- a/output/schema/validation-errors.json +++ b/output/schema/validation-errors.json @@ -156,12 +156,6 @@ ], "response": [] }, - "inference.put_watsonx": { - "request": [ - "Request: path parameter 'task_type' is required in the json spec" - ], - "response": [] - }, "inference.update": { "request": [ "/_inference/{inference_id}/_update: different http methods in the json spec", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index d41f753fdc..52c8ba8737 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13149,7 +13149,7 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo export interface InferencePutWatsonxRequest extends RequestBase { - task_type?: InferencePutWatsonxWatsonxTaskType + task_type: InferencePutWatsonxWatsonxTaskType watsonx_inference_id: Id body?: { service: InferencePutWatsonxServiceType diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts index b78fdd6719..92b0eedb59 100644 --- a/specification/inference/put_watsonx/PutWatsonxRequest.ts +++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts @@ -51,7 +51,7 @@ export interface Request extends RequestBase { * The task type. * The only valid task type for the model to perform is `text_embedding`. */ - task_type?: WatsonxTaskType + task_type: WatsonxTaskType /** * The unique identifier of the inference endpoint. */