From a9c3161a4024eba3eadb486e83d38a9784e554e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Feb 2025 14:14:56 +0100 Subject: [PATCH] [Inference API] Adds chunking_settings to PUT inference API (#3781) * [Inference API] Adds chunking_settings to PUT inference API. * [Inference API] Make contrib. (cherry picked from commit bf2a5ca266971973db9525bd581cb7f3bf27d8ef) --- output/openapi/elasticsearch-openapi.json | 31 +++++++ .../elasticsearch-serverless-openapi.json | 31 +++++++ output/schema/schema-serverless.json | 89 ++++++++++++++++++- output/schema/schema.json | 89 ++++++++++++++++++- output/typescript/types.ts | 8 ++ specification/inference/_types/Services.ts | 37 ++++++++ 6 files changed, 277 insertions(+), 8 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 1322e7da29..7576e09c49 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -73812,6 +73812,9 @@ "inference._types:InferenceEndpoint": { "type": "object", "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, "service": { "description": "The service type", "type": "string" @@ -73828,6 +73831,34 @@ "service_settings" ] }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, "inference._types:ServiceSettings": { "type": "object" }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index cedc5cfb3e..c8d1d69593 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -46497,6 +46497,9 @@ "inference._types:InferenceEndpoint": { "type": "object", "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, "service": { "description": "The service type", "type": "string" @@ -46513,6 +46516,34 @@ "service_settings" ] }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, "inference._types:ServiceSettings": { "type": "object" }, diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 00aed97a76..8b8ff3fac1 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -97051,7 +97051,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L55-L55", + "specLocation": "inference/_types/Services.ts#L92-L92", "type": { "kind": "user_defined_value" } @@ -97089,7 +97089,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L57-L57", + "specLocation": "inference/_types/Services.ts#L94-L94", "type": { "kind": "user_defined_value" } @@ -123876,7 +123876,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L41-L53" + "specLocation": "inference/_types/Services.ts#L46-L58" }, { "description": "Configuration options when storing the inference endpoint", @@ -123886,6 +123886,18 @@ "namespace": "inference._types" }, "properties": [ + { + "description": "Chunking configuration object", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, { "description": "The service type", "name": "service", @@ -123923,7 +123935,76 @@ } } ], - "specLocation": "inference/_types/Services.ts#L23-L39" + "specLocation": "inference/_types/Services.ts#L24-L44" + }, + { + "description": "Chunking configuration object", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "kind": "interface", + "name": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "name": "max_chunk_size", + "required": false, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L60-L90" }, { "description": "InferenceResult is an aggregation of mutually exclusive variants", diff --git a/output/schema/schema.json b/output/schema/schema.json index 4e16da93b8..fd3f74df69 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -146147,6 +146147,75 @@ } } }, + { + "kind": "interface", + "description": "Chunking configuration object", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "name": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "name": "max_chunk_size", + "required": false, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L60-L90" + }, { "kind": "interface", "description": "Configuration options when storing the inference endpoint", @@ -146155,6 +146224,18 @@ "namespace": "inference._types" }, "properties": [ + { + "description": "Chunking configuration object", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, { "description": "The service type", "name": "service", @@ -146192,7 +146273,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L23-L39" + "specLocation": "inference/_types/Services.ts#L24-L44" }, { "kind": "interface", @@ -146233,7 +146314,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L41-L53" + "specLocation": "inference/_types/Services.ts#L46-L58" }, { "kind": "interface", @@ -146369,7 +146450,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L55-L55", + "specLocation": "inference/_types/Services.ts#L92-L92", "type": { "kind": "user_defined_value" } @@ -146428,7 +146509,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L57-L57", + "specLocation": "inference/_types/Services.ts#L94-L94", "type": { "kind": "user_defined_value" } diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 654e4f0eaa..347ad9f3ce 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13004,7 +13004,15 @@ export type InferenceDenseByteVector = byte[] export type InferenceDenseVector = float[] +export interface InferenceInferenceChunkingSettings extends InferenceInferenceEndpoint { + max_chunk_size?: integer + overlap?: integer + sentence_overlap?: integer + strategy?: string +} + export interface InferenceInferenceEndpoint { + chunking_settings?: InferenceInferenceChunkingSettings service: string service_settings: InferenceServiceSettings task_settings?: InferenceTaskSettings diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 52d3c9f7e4..53024633f5 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -18,12 +18,17 @@ */ import { UserDefinedValue } from '@spec_utils/UserDefinedValue' +import { integer } from '@_types/Numeric' import { TaskType } from '../_types/TaskType' /** * Configuration options when storing the inference endpoint */ export class InferenceEndpoint { + /** + * Chunking configuration object + */ + chunking_settings?: InferenceChunkingSettings /** * The service type */ @@ -52,6 +57,38 @@ export class InferenceEndpointInfo extends InferenceEndpoint { task_type: TaskType } +/** + * Chunking configuration object + */ +export class InferenceChunkingSettings extends InferenceEndpoint { + /** + * Specifies the maximum size of a chunk in words + * This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy) + * @server_default 250 + */ + max_chunk_size?: integer + /** + * Specifies the number of overlapping words for chunks + * Only for `word` chunking strategy + * This value cannot be higher than the half of `max_chunk_size` + * @server_default 100 + */ + overlap?: integer + /** + * Specifies the number of overlapping sentences for chunks + * Only for `sentence` chunking strategy + * It can be either `1` or `0` + * @server_default 1 + */ + sentence_overlap?: integer + /** + * Specifies the chunking strategy + * It could be either `sentence` or `word` + * @server_default sentence + */ + strategy?: string +} + export type ServiceSettings = UserDefinedValue export type TaskSettings = UserDefinedValue