From 444c36b2331ca4c3015d055140e7a1e0db7a55bd Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Wed, 5 Mar 2025 12:31:00 -0800
Subject: [PATCH 1/3] Remove watsonx-inference-integration.md

---
 explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md       | 2 +-
 .../docs-content/serverless/general-ml-nlp-auto-scale.md        | 2 +-
 redirects.yml                                                   | 1 +
 solutions/search/retrievers-overview.md                         | 2 +-
 solutions/search/semantic-search.md                             | 2 +-
 5 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md b/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md
index ea90091f4a..1f13c45567 100644
--- a/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md
+++ b/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md
@@ -30,7 +30,7 @@ You can enable adaptive allocations by using:
 * the create inference endpoint API for [ELSER](../../elastic-inference/inference-api/elser-inference-integration.md), [E5 and models uploaded through Eland](../../elastic-inference/inference-api/elasticsearch-inference-integration.md) that are used as {{infer}} services.
 * the [start trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-start-trained-model-deployment) or [update trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-update-trained-model-deployment) APIs for trained models that are deployed on {{ml}} nodes.
 
-If the new allocations fit on the current {{ml}} nodes, they are immediately started. If more resource capacity is needed for creating new model allocations, then your {{ml}} node will be scaled up if {{ml}} autoscaling is enabled to provide enough resources for the new allocation. The number of model allocations can be scaled down to 0. They cannot be scaled up to more than 32 allocations, unless you explicitly set the maximum number of allocations to more. Adaptive allocations must be set up independently for each deployment and [{{infer}} endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put).
+If the new allocations fit on the current {{ml}} nodes, they are immediately started. If more resource capacity is needed for creating new model allocations, then your {{ml}} node will be scaled up if {{ml}} autoscaling is enabled to provide enough resources for the new allocation. The number of model allocations can be scaled down to 0. They cannot be scaled up to more than 32 allocations, unless you explicitly set the maximum number of allocations to more. Adaptive allocations must be set up independently for each deployment and [{{infer}} endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference).
 
 ### Optimizing for typical use cases [optimize-use-case]
 
diff --git a/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md b/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md
index 466bed89d9..f8e4143bee 100644
--- a/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md
+++ b/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md
@@ -31,7 +31,7 @@ You can enable adaptive allocations by using:
 * the create inference endpoint API for [ELSER](../../../explore-analyze/elastic-inference/inference-api/elser-inference-integration.md ), [E5 and models uploaded through Eland](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md) that are used as inference services.
 * the [start trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-start-trained-model-deployment) or [update trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-update-trained-model-deployment) APIs for trained models that are deployed on machine learning nodes.
 
-If the new allocations fit on the current machine learning nodes, they are immediately started. If more resource capacity is needed for creating new model allocations, then your machine learning node will be scaled up if machine learning autoscaling is enabled to provide enough resources for the new allocation. The number of model allocations can be scaled down to 0. They cannot be scaled up to more than 32 allocations, unless you explicitly set the maximum number of allocations to more. Adaptive allocations must be set up independently for each deployment and [inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put).
+If the new allocations fit on the current machine learning nodes, they are immediately started. If more resource capacity is needed for creating new model allocations, then your machine learning node will be scaled up if machine learning autoscaling is enabled to provide enough resources for the new allocation. The number of model allocations can be scaled down to 0. They cannot be scaled up to more than 32 allocations, unless you explicitly set the maximum number of allocations to more. Adaptive allocations must be set up independently for each deployment and [inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference).
 
 When you create inference endpoints on Serverless using Kibana, adaptive allocations are automatically turned on, and there is no option to disable them.
 
diff --git a/redirects.yml b/redirects.yml
index c81e1c8ef4..4c80b2a37d 100644
--- a/redirects.yml
+++ b/redirects.yml
@@ -4,6 +4,7 @@ redirects:
 
 # solutions
   'solutions/search/search-approaches/near-real-time-search.md': '!manage-data/data-store/near-real-time-search.md'
+  'solutions/search/inference-api/watsonx-inference-integration.md': 'https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx'
 
 ## deploy-manage
   'deploy-manage/deploy/elastic-cloud/ec-configure-deployment-settings.md': '!deploy-manage/deploy/elastic-cloud/ec-customize-deployment-components.md'
diff --git a/solutions/search/retrievers-overview.md b/solutions/search/retrievers-overview.md
index e1e7de8201..dd50912a89 100644
--- a/solutions/search/retrievers-overview.md
+++ b/solutions/search/retrievers-overview.md
@@ -29,7 +29,7 @@ Retrievers come in various types, each tailored for different search operations.
 * [**Linear Retriever**](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#operation-search-body-application-json-retriever). Combines the top results from multiple sub-retrievers using a weighted sum of their scores. Allows to specify different weights for each retriever, as well as independently normalize the scores from each result set.
 * [**RRF Retriever**](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#operation-search-body-application-json-retriever). Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets with different relevance indicators into a single result set. An RRF retriever is a **compound retriever**, where its `filter` element is propagated to its sub retrievers.
 * [**Rule Retriever**](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#operation-search-body-application-json-retriever). Applies [query rules](elasticsearch://reference/elasticsearch/rest-apis/searching-with-query-rules.md#query-rules) to the query before returning results.
-* [**Text Similarity Re-ranker Retriever**](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#operation-search-body-application-json-retriever). Used for [semantic reranking](ranking/semantic-reranking.md). Requires first creating a `rerank` task using the [{{es}} Inference API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put).
+* [**Text Similarity Re-ranker Retriever**](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#operation-search-body-application-json-retriever). Used for [semantic reranking](ranking/semantic-reranking.md). Requires first creating a `rerank` task using the [{{es}} Inference API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference).
 
 
 ## What makes retrievers useful? [retrievers-overview-why-are-they-useful]
diff --git a/solutions/search/semantic-search.md b/solutions/search/semantic-search.md
index 97818b1e64..a41207901a 100644
--- a/solutions/search/semantic-search.md
+++ b/solutions/search/semantic-search.md
@@ -35,7 +35,7 @@ This diagram summarizes the relative complexity of each workflow:
 
 ### Option 1: `semantic_text` [_semantic_text_workflow]
 
-The simplest way to use NLP models in the {{stack}} is through the [`semantic_text` workflow](semantic-search/semantic-search-semantic-text.md). We recommend using this approach because it abstracts away a lot of manual work. All you need to do is create an {{infer}} endpoint and an index mapping to start ingesting, embedding, and querying data. There is no need to define model-related settings and parameters, or to create {{infer}} ingest pipelines. Refer to the [Create an {{infer}} endpoint API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) documentation for a list of supported services.
+The simplest way to use NLP models in the {{stack}} is through the [`semantic_text` workflow](semantic-search/semantic-search-semantic-text.md). We recommend using this approach because it abstracts away a lot of manual work. All you need to do is create an {{infer}} endpoint and an index mapping to start ingesting, embedding, and querying data. There is no need to define model-related settings and parameters, or to create {{infer}} ingest pipelines. For more information about the supported services, refer to [](/solutions/search/inference-api.md) and the [{{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference) documentation .
 
 For an end-to-end tutorial, refer to [Semantic search with `semantic_text`](semantic-search/semantic-search-semantic-text.md).
 

From 035e4e0ca3e03b33ee741ab10b94edebf4e839d5 Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Wed, 5 Mar 2025 14:57:24 -0800
Subject: [PATCH 2/3] Revert changes to redirects.yml

---
 redirects.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/redirects.yml b/redirects.yml
index 4c80b2a37d..c81e1c8ef4 100644
--- a/redirects.yml
+++ b/redirects.yml
@@ -4,7 +4,6 @@ redirects:
 
 # solutions
   'solutions/search/search-approaches/near-real-time-search.md': '!manage-data/data-store/near-real-time-search.md'
-  'solutions/search/inference-api/watsonx-inference-integration.md': 'https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx'
 
 ## deploy-manage
   'deploy-manage/deploy/elastic-cloud/ec-configure-deployment-settings.md': '!deploy-manage/deploy/elastic-cloud/ec-customize-deployment-components.md'

From 6b12d761677d2d1726337e15cac4d8915beb60d6 Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Thu, 6 Mar 2025 08:29:10 -0800
Subject: [PATCH 3/3] Fix moved link target

---
 solutions/search/semantic-search.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solutions/search/semantic-search.md b/solutions/search/semantic-search.md
index a41207901a..e6f4cd4ed3 100644
--- a/solutions/search/semantic-search.md
+++ b/solutions/search/semantic-search.md
@@ -35,7 +35,7 @@ This diagram summarizes the relative complexity of each workflow:
 
 ### Option 1: `semantic_text` [_semantic_text_workflow]
 
-The simplest way to use NLP models in the {{stack}} is through the [`semantic_text` workflow](semantic-search/semantic-search-semantic-text.md). We recommend using this approach because it abstracts away a lot of manual work. All you need to do is create an {{infer}} endpoint and an index mapping to start ingesting, embedding, and querying data. There is no need to define model-related settings and parameters, or to create {{infer}} ingest pipelines. For more information about the supported services, refer to [](/solutions/search/inference-api.md) and the [{{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference) documentation .
+The simplest way to use NLP models in the {{stack}} is through the [`semantic_text` workflow](semantic-search/semantic-search-semantic-text.md). We recommend using this approach because it abstracts away a lot of manual work. All you need to do is create an {{infer}} endpoint and an index mapping to start ingesting, embedding, and querying data. There is no need to define model-related settings and parameters, or to create {{infer}} ingest pipelines. For more information about the supported services, refer to [](/explore-analyze/elastic-inference/inference-api.md) and the [{{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference) documentation .
 
 For an end-to-end tutorial, refer to [Semantic search with `semantic_text`](semantic-search/semantic-search-semantic-text.md).