From 2b23030d532698ba70d42cb80d43a5269b02f7ce Mon Sep 17 00:00:00 2001 From: kosabogi Date: Mon, 13 Oct 2025 12:13:34 +0200 Subject: [PATCH 1/3] Adds new parameters to the elasticsearch inference rerank API --- specification/inference/_types/CommonTypes.ts | 17 +++++++++++++++++ .../PutElasticsearchRequest.ts | 6 +++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 25d1be939a..f598870e8b 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1306,6 +1306,23 @@ export class ElasticsearchServiceSettings { * The maximum value is 32. */ num_threads: integer + /** + * Only for the `rerank` task type. + * Controls the strategy used for processing long documents during inference. + * + * Possible values: + * - `truncate` (default): Processes only the beginning of each document. + * - `chunk`: Splits long documents into smaller parts (chunks) before inference. + * + * To enable chunking, set this value to `chunk`. + */ + long_document_strategy?: string + /** + * Only for the `rerank` task type. + * Limits the number of chunks per document that are sent for inference when chunking is enabled. + * If not set, all chunks generated for the document are processed. + */ + max_chunks_per_doc?: integer } export class ElasticsearchTaskSettings { diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts index b57b062f67..3880623a5f 100644 --- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts +++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts @@ -26,7 +26,7 @@ import { ElasticsearchTaskSettings, ElasticsearchTaskType } from '@inference/_types/CommonTypes' -import { InferenceChunkingSettings } from '@inference/_types/Services' +import { ElasticsearchInferenceChunkingSettings } from '@inference/_types/Services' /** * Create an Elasticsearch inference endpoint. @@ -78,10 +78,10 @@ export interface Request extends RequestBase { } body: { /** - * The chunking configuration object. + * The chunking configuration object. For the `rerank` task type, you can enable chunking by setting the `long_document_strategy` parameter to `chunk` in the `service_settings` object. * @ext_doc_id inference-chunking */ - chunking_settings?: InferenceChunkingSettings + chunking_settings?: ElasticsearchInferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `elasticsearch`. */ From 911b868dc06ac1a98a557a69d2b60f67e49a208c Mon Sep 17 00:00:00 2001 From: kosabogi Date: Mon, 13 Oct 2025 12:26:29 +0200 Subject: [PATCH 2/3] Adds unique inference chunking settings for elasticsearch --- specification/inference/_types/Services.ts | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 788604d1d1..8676bb221b 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -322,6 +322,67 @@ export class InferenceEndpointInfoWatsonx extends InferenceEndpoint { task_type: TaskTypeWatsonx } +/** + * Chunking configuration object + */ +export class ElasticsearchInferenceChunkingSettings { + /** + * The maximum size of a chunk in words. + * This value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy). + * This value should not exceed the window size for the associated model. + * @server_default 250 + */ + max_chunk_size?: integer + /** + * The number of overlapping words for chunks. + * It is applicable only to a `word` chunking strategy. + * This value cannot be higher than half the `max_chunk_size` value. + * @server_default 100 + */ + overlap?: integer + /** + * The number of overlapping sentences for chunks. + * It is applicable only for a `sentence` chunking strategy. + * It can be either `1` or `0`. + * @server_default 1 + */ + sentence_overlap?: integer + /** + * Only applicable to the `recursive` strategy and required when using it. + * + * Sets a predefined list of separators in the saved chunking settings based on the selected text type. + * Values can be `markdown` or `plaintext`. + * + * Using this parameter is an alternative to manually specifying a custom `separators` list. + */ + separator_group?: string + /** + * Only applicable to the `recursive` strategy and required when using it. + * + * A list of strings used as possible split points when chunking text. + * + * Each string can be a plain string or a regular expression (regex) pattern. + * The system tries each separator in order to split the text, starting from the first item in the list. + * + * After splitting, it attempts to recombine smaller pieces into larger chunks that stay within + * the `max_chunk_size` limit, to reduce the total number of chunks generated. + */ + separators?: string[] + /** + * The chunking strategy: `sentence`, `word`, `none` or `recursive`. + * + * * If `strategy` is set to `recursive`, you must also specify: + * + * - `max_chunk_size` + * - either `separators` or`separator_group` + * + * Learn more about different chunking strategies in the linked documentation. + * @server_default sentence + * @ext_doc_id chunking-strategies + */ + strategy?: string +} + /** * Chunking configuration object */ From e8b530c98ecbce55c4768bd3e4593d5b74bcfeec Mon Sep 17 00:00:00 2001 From: kosabogi Date: Thu, 16 Oct 2025 15:26:55 +0200 Subject: [PATCH 3/3] Addresses suggestions --- specification/inference/_types/CommonTypes.ts | 5 +- specification/inference/_types/Services.ts | 65 +------------------ .../PutElasticsearchRequest.ts | 8 ++- 3 files changed, 11 insertions(+), 67 deletions(-) diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index f598870e8b..ca3cee712f 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1307,14 +1307,15 @@ export class ElasticsearchServiceSettings { */ num_threads: integer /** - * Only for the `rerank` task type. + * Available only for the `rerank` task type using the Elastic reranker model. * Controls the strategy used for processing long documents during inference. * * Possible values: * - `truncate` (default): Processes only the beginning of each document. * - `chunk`: Splits long documents into smaller parts (chunks) before inference. * - * To enable chunking, set this value to `chunk`. + * When `long_document_strategy` is set to `chunk`, Elasticsearch splits each document into smaller parts but still returns a single score per document. + * That score reflects the highest relevance score among all chunks. */ long_document_strategy?: string /** diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 8676bb221b..b9d5fb0972 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -50,7 +50,9 @@ import { */ export class InferenceEndpoint { /** - * Chunking configuration object + * The chunking configuration object. + * Applies only to the `sparse_embedding` and `text_embedding` task types. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. */ chunking_settings?: InferenceChunkingSettings /** @@ -322,67 +324,6 @@ export class InferenceEndpointInfoWatsonx extends InferenceEndpoint { task_type: TaskTypeWatsonx } -/** - * Chunking configuration object - */ -export class ElasticsearchInferenceChunkingSettings { - /** - * The maximum size of a chunk in words. - * This value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy). - * This value should not exceed the window size for the associated model. - * @server_default 250 - */ - max_chunk_size?: integer - /** - * The number of overlapping words for chunks. - * It is applicable only to a `word` chunking strategy. - * This value cannot be higher than half the `max_chunk_size` value. - * @server_default 100 - */ - overlap?: integer - /** - * The number of overlapping sentences for chunks. - * It is applicable only for a `sentence` chunking strategy. - * It can be either `1` or `0`. - * @server_default 1 - */ - sentence_overlap?: integer - /** - * Only applicable to the `recursive` strategy and required when using it. - * - * Sets a predefined list of separators in the saved chunking settings based on the selected text type. - * Values can be `markdown` or `plaintext`. - * - * Using this parameter is an alternative to manually specifying a custom `separators` list. - */ - separator_group?: string - /** - * Only applicable to the `recursive` strategy and required when using it. - * - * A list of strings used as possible split points when chunking text. - * - * Each string can be a plain string or a regular expression (regex) pattern. - * The system tries each separator in order to split the text, starting from the first item in the list. - * - * After splitting, it attempts to recombine smaller pieces into larger chunks that stay within - * the `max_chunk_size` limit, to reduce the total number of chunks generated. - */ - separators?: string[] - /** - * The chunking strategy: `sentence`, `word`, `none` or `recursive`. - * - * * If `strategy` is set to `recursive`, you must also specify: - * - * - `max_chunk_size` - * - either `separators` or`separator_group` - * - * Learn more about different chunking strategies in the linked documentation. - * @server_default sentence - * @ext_doc_id chunking-strategies - */ - strategy?: string -} - /** * Chunking configuration object */ diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts index 3880623a5f..b35720d1c7 100644 --- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts +++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts @@ -26,7 +26,7 @@ import { ElasticsearchTaskSettings, ElasticsearchTaskType } from '@inference/_types/CommonTypes' -import { ElasticsearchInferenceChunkingSettings } from '@inference/_types/Services' +import { InferenceChunkingSettings } from '@inference/_types/Services' /** * Create an Elasticsearch inference endpoint. @@ -78,10 +78,12 @@ export interface Request extends RequestBase { } body: { /** - * The chunking configuration object. For the `rerank` task type, you can enable chunking by setting the `long_document_strategy` parameter to `chunk` in the `service_settings` object. + * The chunking configuration object. + * Applies only to the `sparse_embedding` and `text_embedding` task types. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. * @ext_doc_id inference-chunking */ - chunking_settings?: ElasticsearchInferenceChunkingSettings + chunking_settings?: InferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `elasticsearch`. */