From 3d41b4efdf113590cdf04fd78941aefeadf50e5a Mon Sep 17 00:00:00 2001 From: kosabogi Date: Fri, 28 Feb 2025 09:41:58 +0100 Subject: [PATCH] Updates the Watsonx integration page with rerank feature --- .../watsonx-inference-integration.md | 67 ++++++++++++++++--- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/solutions/search/inference-api/watsonx-inference-integration.md b/solutions/search/inference-api/watsonx-inference-integration.md index 677be506dc..2b5c745fff 100644 --- a/solutions/search/inference-api/watsonx-inference-integration.md +++ b/solutions/search/inference-api/watsonx-inference-integration.md @@ -33,9 +33,8 @@ You need an [IBM Cloud® Databases for Elasticsearch deployment](https://cloud.i Available task types: - * `text_embedding`. - - + * `text_embedding`, + * `rerank`. ## {{api-request-body-title}} [infer-service-watsonx-ai-api-request-body] @@ -50,9 +49,9 @@ You need an [IBM Cloud® Databases for Elasticsearch deployment](https://cloud.i `api_key` : (Required, string) A valid API key of your Watsonx account. You can find your Watsonx API keys or you can create a new one [on the API keys page](https://cloud.ibm.com/iam/apikeys). - ::::{important} - You need to provide the API key only once, during the {{infer}} model creation. The [Get {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get) does not retrieve your API key. After creating the {{infer}} model, you cannot change the associated API key. If you want to use a different API key, delete the {{infer}} model and recreate it with the same name and the updated API key. - :::: + ::::{important} + You need to provide the API key only once, during the {{infer}} model creation. The [Get {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get) does not retrieve your API key. After creating the {{infer}} model, you cannot change the associated API key. If you want to use a different API key, delete the {{infer}} model and recreate it with the same name and the updated API key. + :::: `api_version` @@ -70,13 +69,28 @@ You need an [IBM Cloud® Databases for Elasticsearch deployment](https://cloud.i `rate_limit` : (Optional, object) By default, the `watsonxai` service sets the number of requests allowed per minute to `120`. This helps to minimize the number of rate limit errors returned from Watsonx. To modify this, set the `requests_per_minute` setting of this object in your service settings: - ```text - "rate_limit": { - "requests_per_minute": <> - } - ``` +```json +"rate_limit": { + "requests_per_minute": <> +} +``` + +`task_settings` +: (Optional, object) Settings to configure the inference task. + + These settings are specific to the `` you specified. + +::::{dropdown} `task_settings` for the `rerank` task type +`truncate_input_tokens` +: (Optional, integer) Specifies the maximum number of tokens per input document before truncation. + +`return_documents` +: (Optional, boolean) Specify whether to return doc text within the results. +`top_n` +: (Optional, integer) The number of most relevant documents to return. Defaults to the number of input documents. +:::: ## Watsonx AI service example [inference-example-watsonx-ai] @@ -101,4 +115,35 @@ PUT _inference/text_embedding/watsonx-embeddings 3. The ID of your IBM Cloud project. 4. A valid API version parameter. You can find the active version data parameters [here](https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates). +The following example shows how to create an inference endpoint called `watsonx-rerank` to perform a `rerank` task type. + +```console + +PUT _inference/rerank/watsonx-rerank +{ + "service": "watsonxai", + "service_settings": { + "api_key": "", <1> + "url": "", <2> + "model_id": "cross-encoder/ms-marco-minilm-l-12-v2", + "project_id": "", <3> + "api_version": "2024-05-02" <4> + }, + "task_settings": { + "truncate_input_tokens": 50, <5> + "return_documents": true, <6> + "top_n": 3 <7> + } +} +``` + +1. A valid Watsonx API key. You can find on the [API keys page of your account](https://cloud.ibm.com/iam/apikeys). +2. The {{infer}} endpoint URL you created on Watsonx. +3. The ID of your IBM Cloud project. +4. A valid API version parameter. You can find the active version data parameters [here](https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates). +5. The maximum number of tokens per document before truncation. +6. Whether to return the document text in the results. +7. The number of top relevant documents to return. + +