From 2b23030d532698ba70d42cb80d43a5269b02f7ce Mon Sep 17 00:00:00 2001
From: kosabogi <boglarka.kosa@elastic.co>
Date: Mon, 13 Oct 2025 12:13:34 +0200
Subject: [PATCH 1/3] Adds new parameters to the elasticsearch inference rerank
 API

---
 specification/inference/_types/CommonTypes.ts   | 17 +++++++++++++++++
 .../PutElasticsearchRequest.ts                  |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 25d1be939a..f598870e8b 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1306,6 +1306,23 @@ export class ElasticsearchServiceSettings {
    * The maximum value is 32.
    */
   num_threads: integer
+  /**
+   * Only for the `rerank` task type.
+   * Controls the strategy used for processing long documents during inference.
+   *
+   * Possible values:
+   * - `truncate` (default): Processes only the beginning of each document.
+   * - `chunk`: Splits long documents into smaller parts (chunks) before inference.
+   *
+   * To enable chunking, set this value to `chunk`.
+   */
+  long_document_strategy?: string
+  /**
+   * Only for the `rerank` task type.
+   * Limits the number of chunks per document that are sent for inference when chunking is enabled.
+   * If not set, all chunks generated for the document are processed.
+   */
+  max_chunks_per_doc?: integer
 }
 
 export class ElasticsearchTaskSettings {
diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
index b57b062f67..3880623a5f 100644
--- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
+++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -26,7 +26,7 @@ import {
   ElasticsearchTaskSettings,
   ElasticsearchTaskType
 } from '@inference/_types/CommonTypes'
-import { InferenceChunkingSettings } from '@inference/_types/Services'
+import { ElasticsearchInferenceChunkingSettings } from '@inference/_types/Services'
 
 /**
  * Create an Elasticsearch inference endpoint.
@@ -78,10 +78,10 @@ export interface Request extends RequestBase {
   }
   body: {
     /**
-     * The chunking configuration object.
+     * The chunking configuration object. For the `rerank` task type, you can enable chunking by setting the `long_document_strategy` parameter to `chunk` in the `service_settings` object.
      * @ext_doc_id inference-chunking
      */
-    chunking_settings?: InferenceChunkingSettings
+    chunking_settings?: ElasticsearchInferenceChunkingSettings
     /**
      * The type of service supported for the specified task type. In this case, `elasticsearch`.
      */

From 911b868dc06ac1a98a557a69d2b60f67e49a208c Mon Sep 17 00:00:00 2001
From: kosabogi <boglarka.kosa@elastic.co>
Date: Mon, 13 Oct 2025 12:26:29 +0200
Subject: [PATCH 2/3] Adds unique inference chunking settings for elasticsearch

---
 specification/inference/_types/Services.ts | 61 ++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
index 788604d1d1..8676bb221b 100644
--- a/specification/inference/_types/Services.ts
+++ b/specification/inference/_types/Services.ts
@@ -322,6 +322,67 @@ export class InferenceEndpointInfoWatsonx extends InferenceEndpoint {
   task_type: TaskTypeWatsonx
 }
 
+/**
+ * Chunking configuration object
+ */
+export class ElasticsearchInferenceChunkingSettings {
+  /**
+   * The maximum size of a chunk in words.
+   * This value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).
+   * This value should not exceed the window size for the associated model.
+   * @server_default 250
+   */
+  max_chunk_size?: integer
+  /**
+   * The number of overlapping words for chunks.
+   * It is applicable only to a `word` chunking strategy.
+   * This value cannot be higher than half the `max_chunk_size` value.
+   * @server_default 100
+   */
+  overlap?: integer
+  /**
+   * The number of overlapping sentences for chunks.
+   * It is applicable only for a `sentence` chunking strategy.
+   * It can be either `1` or `0`.
+   * @server_default 1
+   */
+  sentence_overlap?: integer
+  /**
+   * Only applicable to the `recursive` strategy and required when using it.
+   *
+   * Sets a predefined list of separators in the saved chunking settings based on the selected text type.
+   * Values can be `markdown` or `plaintext`.
+   *
+   * Using this parameter is an alternative to manually specifying a custom `separators` list.
+   */
+  separator_group?: string
+  /**
+   * Only applicable to the `recursive` strategy and required when using it.
+   *
+   * A list of strings used as possible split points when chunking text.
+   *
+   * Each string can be a plain string or a regular expression (regex) pattern.
+   * The system tries each separator in order to split the text, starting from the first item in the list.
+   *
+   * After splitting, it attempts to recombine smaller pieces into larger chunks that stay within
+   * the `max_chunk_size` limit, to reduce the total number of chunks generated.
+   */
+  separators?: string[]
+  /**
+   * The chunking strategy: `sentence`, `word`, `none` or `recursive`.
+   *
+   *  * If `strategy` is set to `recursive`, you must also specify:
+   *
+   * - `max_chunk_size`
+   * - either `separators` or`separator_group`
+   *
+   * Learn more about different chunking strategies in the linked documentation.
+   * @server_default sentence
+   * @ext_doc_id chunking-strategies
+   */
+  strategy?: string
+}
+
 /**
  * Chunking configuration object
  */

From e8b530c98ecbce55c4768bd3e4593d5b74bcfeec Mon Sep 17 00:00:00 2001
From: kosabogi <boglarka.kosa@elastic.co>
Date: Thu, 16 Oct 2025 15:26:55 +0200
Subject: [PATCH 3/3] Addresses suggestions

---
 specification/inference/_types/CommonTypes.ts |  5 +-
 specification/inference/_types/Services.ts    | 65 +------------------
 .../PutElasticsearchRequest.ts                |  8 ++-
 3 files changed, 11 insertions(+), 67 deletions(-)

diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index f598870e8b..ca3cee712f 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1307,14 +1307,15 @@ export class ElasticsearchServiceSettings {
    */
   num_threads: integer
   /**
-   * Only for the `rerank` task type.
+   * Available only for the `rerank` task type using the Elastic reranker model.
    * Controls the strategy used for processing long documents during inference.
    *
    * Possible values:
    * - `truncate` (default): Processes only the beginning of each document.
    * - `chunk`: Splits long documents into smaller parts (chunks) before inference.
    *
-   * To enable chunking, set this value to `chunk`.
+   * When `long_document_strategy` is set to `chunk`, Elasticsearch splits each document into smaller parts but still returns a single score per document.
+   * That score reflects the highest relevance score among all chunks.
    */
   long_document_strategy?: string
   /**
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
index 8676bb221b..b9d5fb0972 100644
--- a/specification/inference/_types/Services.ts
+++ b/specification/inference/_types/Services.ts
@@ -50,7 +50,9 @@ import {
  */
 export class InferenceEndpoint {
   /**
-   * Chunking configuration object
+   * The chunking configuration object.
+   * Applies only to the `sparse_embedding` and `text_embedding` task types.
+   * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
    */
   chunking_settings?: InferenceChunkingSettings
   /**
@@ -322,67 +324,6 @@ export class InferenceEndpointInfoWatsonx extends InferenceEndpoint {
   task_type: TaskTypeWatsonx
 }
 
-/**
- * Chunking configuration object
- */
-export class ElasticsearchInferenceChunkingSettings {
-  /**
-   * The maximum size of a chunk in words.
-   * This value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).
-   * This value should not exceed the window size for the associated model.
-   * @server_default 250
-   */
-  max_chunk_size?: integer
-  /**
-   * The number of overlapping words for chunks.
-   * It is applicable only to a `word` chunking strategy.
-   * This value cannot be higher than half the `max_chunk_size` value.
-   * @server_default 100
-   */
-  overlap?: integer
-  /**
-   * The number of overlapping sentences for chunks.
-   * It is applicable only for a `sentence` chunking strategy.
-   * It can be either `1` or `0`.
-   * @server_default 1
-   */
-  sentence_overlap?: integer
-  /**
-   * Only applicable to the `recursive` strategy and required when using it.
-   *
-   * Sets a predefined list of separators in the saved chunking settings based on the selected text type.
-   * Values can be `markdown` or `plaintext`.
-   *
-   * Using this parameter is an alternative to manually specifying a custom `separators` list.
-   */
-  separator_group?: string
-  /**
-   * Only applicable to the `recursive` strategy and required when using it.
-   *
-   * A list of strings used as possible split points when chunking text.
-   *
-   * Each string can be a plain string or a regular expression (regex) pattern.
-   * The system tries each separator in order to split the text, starting from the first item in the list.
-   *
-   * After splitting, it attempts to recombine smaller pieces into larger chunks that stay within
-   * the `max_chunk_size` limit, to reduce the total number of chunks generated.
-   */
-  separators?: string[]
-  /**
-   * The chunking strategy: `sentence`, `word`, `none` or `recursive`.
-   *
-   *  * If `strategy` is set to `recursive`, you must also specify:
-   *
-   * - `max_chunk_size`
-   * - either `separators` or`separator_group`
-   *
-   * Learn more about different chunking strategies in the linked documentation.
-   * @server_default sentence
-   * @ext_doc_id chunking-strategies
-   */
-  strategy?: string
-}
-
 /**
  * Chunking configuration object
  */
diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
index 3880623a5f..b35720d1c7 100644
--- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
+++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -26,7 +26,7 @@ import {
   ElasticsearchTaskSettings,
   ElasticsearchTaskType
 } from '@inference/_types/CommonTypes'
-import { ElasticsearchInferenceChunkingSettings } from '@inference/_types/Services'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 
 /**
  * Create an Elasticsearch inference endpoint.
@@ -78,10 +78,12 @@ export interface Request extends RequestBase {
   }
   body: {
     /**
-     * The chunking configuration object. For the `rerank` task type, you can enable chunking by setting the `long_document_strategy` parameter to `chunk` in the `service_settings` object.
+     * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
-    chunking_settings?: ElasticsearchInferenceChunkingSettings
+    chunking_settings?: InferenceChunkingSettings
     /**
      * The type of service supported for the specified task type. In this case, `elasticsearch`.
      */