From ca6b4e7e318f3abffcff30fe64971e36a6a62eaf Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 07:08:39 -0700 Subject: [PATCH] Add JinaAI inference API details (#4030) Co-authored-by: Laura Trotta <153528055+l-trotta@users.noreply.github.com> (cherry picked from commit 409acf1a04bbbcc359e0f352ec0563d5b1e9a780) --- output/openapi/elasticsearch-openapi.json | 157 ++++++++ .../elasticsearch-serverless-openapi.json | 157 ++++++++ output/schema/schema.json | 355 +++++++++++++++++- output/typescript/types.ts | 34 ++ package-lock.json | 26 +- specification/_doc_ids/table.csv | 4 + .../_json_spec/inference.put_jinaai.json | 35 ++ .../inference/put_jinaai/PutJinaAiRequest.ts | 160 ++++++++ .../inference/put_jinaai/PutJinaAiResponse.ts | 24 ++ .../request/PutJinaAiRequestExample1.yaml | 12 + .../request/PutJinaAiRequestExample2.yaml | 16 + 11 files changed, 966 insertions(+), 14 deletions(-) create mode 100644 specification/_json_spec/inference.put_jinaai.json create mode 100644 specification/inference/put_jinaai/PutJinaAiRequest.ts create mode 100644 specification/inference/put_jinaai/PutJinaAiResponse.ts create mode 100644 specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample1.yaml create mode 100644 specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 064b4d9c19..f0e72fea77 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17987,6 +17987,92 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{jinaai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an JinaAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-jinaai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "jinaai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_jinaai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutJinaAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" + }, + "PutJinaAiRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.18.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -77128,6 +77214,77 @@ "url" ] }, + "inference.put_jinaai:JinaAITaskType": { + "type": "string", + "enum": [ + "rerank", + "text_embedding" + ] + }, + "inference.put_jinaai:ServiceType": { + "type": "string", + "enum": [ + "jinaai" + ] + }, + "inference.put_jinaai:JinaAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://jina.ai/embeddings/" + }, + "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "similarity": { + "$ref": "#/components/schemas/inference.put_jinaai:SimilarityType" + } + }, + "required": [ + "api_key" + ] + }, + "inference.put_jinaai:SimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference.put_jinaai:JinaAITaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, return the doc text within the results.", + "type": "boolean" + }, + "task": { + "$ref": "#/components/schemas/inference.put_jinaai:TextEmbeddingTask" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "type": "number" + } + } + }, + "inference.put_jinaai:TextEmbeddingTask": { + "type": "string", + "enum": [ + "classification", + "clustering", + "ingest", + "search" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index eac2fe4fc9..b66fa9db0b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9953,6 +9953,92 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{jinaai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an JinaAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-jinaai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "jinaai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_jinaai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutJinaAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" + }, + "PutJinaAiRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.18.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -48458,6 +48544,77 @@ "url" ] }, + "inference.put_jinaai:JinaAITaskType": { + "type": "string", + "enum": [ + "rerank", + "text_embedding" + ] + }, + "inference.put_jinaai:ServiceType": { + "type": "string", + "enum": [ + "jinaai" + ] + }, + "inference.put_jinaai:JinaAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://jina.ai/embeddings/" + }, + "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "similarity": { + "$ref": "#/components/schemas/inference.put_jinaai:SimilarityType" + } + }, + "required": [ + "api_key" + ] + }, + "inference.put_jinaai:SimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference.put_jinaai:JinaAITaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, return the doc text within the results.", + "type": "boolean" + }, + "task": { + "$ref": "#/components/schemas/inference.put_jinaai:TextEmbeddingTask" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "type": "number" + } + } + }, + "inference.put_jinaai:TextEmbeddingTask": { + "type": "string", + "enum": [ + "classification", + "clustering", + "ingest", + "search" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema.json b/output/schema/schema.json index 4296908c28..c75cfa6206 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9441,6 +9441,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.18.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-jinaai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-jinaai.html", + "name": "inference.put_jinaai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_jinaai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_jinaai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{jinaai_inference_id}" + } + ] + }, { "availability": { "stack": { @@ -150822,6 +150867,314 @@ }, "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97" }, + { + "kind": "interface", + "name": { + "name": "JinaAIServiceSettings", + "namespace": "inference.put_jinaai" + }, + "properties": [ + { + "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "jinaAi-embeddings", + "extDocUrl": "https://jina.ai/embeddings/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from JinaAI.\nBy default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types.", + "extDocId": "jinaAi-rate-limit", + "extDocUrl": "https://jina.ai/contact-sales/#rate-limit", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.\nThe default values varies with the embedding type.\nFor example, a float embedding type uses a `dot_product` similarity measure by default.", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "SimilarityType", + "namespace": "inference.put_jinaai" + } + } + } + ], + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L108-L137" + }, + { + "kind": "interface", + "name": { + "name": "JinaAITaskSettings", + "namespace": "inference.put_jinaai" + }, + "properties": [ + { + "description": "For a `rerank` task, return the doc text within the results.", + "name": "return_documents", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `text_embedding` task, the task passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.", + "name": "task", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingTask", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L139-L160" + }, + { + "kind": "enum", + "members": [ + { + "name": "rerank" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "JinaAITaskType", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L86-L89" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `jinaai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `jinaai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "JinaAIServiceSettings", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "JinaAITaskSettings", + "namespace": "inference.put_jinaai" + } + } + } + ] + }, + "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutJinaAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" + }, + "PutJinaAiRequestExample2": { + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", + "summary": "A rerank task", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_jinaai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "JinaAITaskType", + "namespace": "inference.put_jinaai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "jinaai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "jinaai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L91-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "cosine" + }, + { + "name": "dot_product" + }, + { + "name": "l2_norm" + } + ], + "name": { + "name": "SimilarityType", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L95-L99" + }, + { + "kind": "enum", + "members": [ + { + "name": "classification" + }, + { + "name": "clustering" + }, + { + "name": "ingest" + }, + { + "name": "search" + } + ], + "name": { + "name": "TextEmbeddingTask", + "namespace": "inference.put_jinaai" + }, + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L101-L106" + }, { "kind": "interface", "name": { @@ -155796,7 +156149,7 @@ { "description": "Joins each element of an array into a single string using a separator character between each element.\nThrows an error when the field is not an array.", "docId": "join-processor", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/join-processor.html", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/join-processor.html", "name": "join", "required": false, "type": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 0f6550d727..d9734eadbd 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13291,6 +13291,40 @@ export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfo export type InferencePutHuggingFaceServiceType = 'hugging_face' +export interface InferencePutJinaaiJinaAIServiceSettings { + api_key: string + model_id?: string + rate_limit?: InferenceRateLimitSetting + similarity?: InferencePutJinaaiSimilarityType +} + +export interface InferencePutJinaaiJinaAITaskSettings { + return_documents?: boolean + task?: InferencePutJinaaiTextEmbeddingTask + top_n?: integer +} + +export type InferencePutJinaaiJinaAITaskType = 'rerank' | 'text_embedding' + +export interface InferencePutJinaaiRequest extends RequestBase { + task_type: InferencePutJinaaiJinaAITaskType + jinaai_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutJinaaiServiceType + service_settings: InferencePutJinaaiJinaAIServiceSettings + task_settings?: InferencePutJinaaiJinaAITaskSettings + } +} + +export type InferencePutJinaaiResponse = InferenceInferenceEndpointInfo + +export type InferencePutJinaaiServiceType = 'jinaai' + +export type InferencePutJinaaiSimilarityType = 'cosine' | 'dot_product' | 'l2_norm' + +export type InferencePutJinaaiTextEmbeddingTask = 'classification' | 'clustering' | 'ingest' | 'search' + export interface InferencePutOpenaiOpenAIServiceSettings { api_key: string dimensions?: integer diff --git a/package-lock.json b/package-lock.json index df19b685a5..4388b85a6e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "@redocly/cli": "^1.33.1", + "@redocly/cli": "^1.34.0", "@stoplight/spectral-cli": "^6.14.2" } }, @@ -489,9 +489,9 @@ } }, "node_modules/@redocly/cli": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.33.1.tgz", - "integrity": "sha512-co+Vr/RfH9Nca3eiYuYvbLxI+5RVOyJ+l56B0SmU5UHfticTUXirO0vxtFmkHmch6YIFVU6BCF4tFbj7ssF8iQ==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.0.tgz", + "integrity": "sha512-Kg/t9zMjZB5cyb0YQLa+gne5E5Rz6wZP/goug1+2qaR17UqeupidBzwqDdr3lszEK3q2A37g4+W7pvdBOkiGQA==", "license": "MIT", "dependencies": { "@opentelemetry/api": "1.9.0", @@ -500,8 +500,8 @@ "@opentelemetry/sdk-trace-node": "1.26.0", "@opentelemetry/semantic-conventions": "1.27.0", "@redocly/config": "^0.22.0", - "@redocly/openapi-core": "1.33.1", - "@redocly/respect-core": "1.33.1", + "@redocly/openapi-core": "1.34.0", + "@redocly/respect-core": "1.34.0", "abort-controller": "^3.0.0", "chokidar": "^3.5.1", "colorette": "^1.2.0", @@ -564,9 +564,9 @@ "license": "MIT" }, "node_modules/@redocly/openapi-core": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.33.1.tgz", - "integrity": "sha512-tL3v8FVwdcCAcruOZV77uxH2ZFtnY3DRPG+rgmlm9hsu5uoatofVSJIJHUroz54KJ8ryeo28wQHhOr8iReGGEQ==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.0.tgz", + "integrity": "sha512-Ji00EiLQRXq0pJIz5pAjGF9MfQvQVsQehc6uIis6sqat8tG/zh25Zi64w6HVGEDgJEzUeq/CuUlD0emu3Hdaqw==", "license": "MIT", "dependencies": { "@redocly/ajv": "^8.11.2", @@ -606,14 +606,14 @@ } }, "node_modules/@redocly/respect-core": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.33.1.tgz", - "integrity": "sha512-Sh6TahtuvSzvejkfu74KErdMX6VtrNNRJAtwH9A6R1Igo8WVmrdoFE99uAp/dOL9bpAQPg4oKtrTF60avN7YYA==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.0.tgz", + "integrity": "sha512-CO2XxJ0SUYHKixKPTQm2U6QrGLnNhQy88CnX20llCxXDKd485cSioRMZ8MMNhHrnDsUlprSuM3ui2z5JGf1ftw==", "license": "MIT", "dependencies": { "@faker-js/faker": "^7.6.0", "@redocly/ajv": "8.11.2", - "@redocly/openapi-core": "1.33.1", + "@redocly/openapi-core": "1.34.0", "better-ajv-errors": "^1.2.0", "colorette": "^2.0.20", "concat-stream": "^2.0.0", diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index aeb2487770..04818b0fd8 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -322,6 +322,7 @@ inference-api-post-eis-chat-completion,https://www.elastic.co/guide/en/elasticse inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-hugging-face.html +inference-api-put-jinaai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-jinaai.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-openai.html inference-api-put-voyageai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-voyageai.html inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html @@ -336,6 +337,9 @@ ingest-circle-processor,https://www.elastic.co/guide/en/elasticsearch/reference/ ingest-node-set-security-user-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{banch}/ingest-node-set-security-user-processor.html inner-hits,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inner-hits.html ip-location-delete-database,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/delete-ip-location-database-api.html +jinaAi-embeddings,https://jina.ai/embeddings/ +jinaAi-rate-limit,https://jina.ai/contact-sales/#rate-limit +join-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/join-processor.html ip-location-get-database,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-ip-location-database-api.html ip-location-put-database,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-ip-location-database-api.html join-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/join-processor.html diff --git a/specification/_json_spec/inference.put_jinaai.json b/specification/_json_spec/inference.put_jinaai.json new file mode 100644 index 0000000000..80af2a69c9 --- /dev/null +++ b/specification/_json_spec/inference.put_jinaai.json @@ -0,0 +1,35 @@ +{ + "inference.put_jinaai": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html", + "description": "Configure a JinaAI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{jinaai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "jinaai_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_jinaai/PutJinaAiRequest.ts b/specification/inference/put_jinaai/PutJinaAiRequest.ts new file mode 100644 index 0000000000..91486dc832 --- /dev/null +++ b/specification/inference/put_jinaai/PutJinaAiRequest.ts @@ -0,0 +1,160 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { integer } from '@_types/Numeric' + +/** + * Create an JinaAI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `jinaai` service. + * + * To review the available `rerank` models, refer to . + * To review the available `text_embedding` models, refer to the . + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_jinaai + * @availability stack since=8.18.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-jinaai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{jinaai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: JinaAITaskType + /** + * The unique identifier of the inference endpoint. + */ + jinaai_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `jinaai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `jinaai` service. + */ + service_settings: JinaAIServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: JinaAITaskSettings + } +} + +export enum JinaAITaskType { + rerank, + text_embedding +} + +export enum ServiceType { + jinaai +} + +export enum SimilarityType { + cosine, + dot_product, + l2_norm +} + +export enum TextEmbeddingTask { + classification, + clustering, + ingest, + search +} + +export class JinaAIServiceSettings { + /** + * A valid API key of your JinaAI account. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id jinaAi-embeddings + */ + api_key: string + /** + * The name of the model to use for the inference task. + * For a `rerank` task, it is required. + * For a `text_embedding` task, it is optional. + */ + model_id?: string + /** + * This setting helps to minimize the number of rate limit errors returned from JinaAI. + * By default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types. + * @ext_doc_id jinaAi-rate-limit + */ + rate_limit?: RateLimitSetting + /** + * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm. + * The default values varies with the embedding type. + * For example, a float embedding type uses a `dot_product` similarity measure by default. + */ + similarity?: SimilarityType +} + +export class JinaAITaskSettings { + /** + * For a `rerank` task, return the doc text within the results. + */ + return_documents?: boolean + /** + * For a `text_embedding` task, the task passed to the model. + * Valid values are: + * + * * `classification`: Use it for embeddings passed through a text classifier. + * * `clustering`: Use it for the embeddings run through a clustering algorithm. + * * `ingest`: Use it for storing document embeddings in a vector database. + * * `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents. + */ + task?: TextEmbeddingTask + /** + * For a `rerank` task, the number of most relevant documents to return. + * It defaults to the number of the documents. + * If this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. + */ + top_n?: integer +} diff --git a/specification/inference/put_jinaai/PutJinaAiResponse.ts b/specification/inference/put_jinaai/PutJinaAiResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_jinaai/PutJinaAiResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample1.yaml b/specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample1.yaml new file mode 100644 index 0000000000..832d10e0c4 --- /dev/null +++ b/specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample1.yaml @@ -0,0 +1,12 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service. +# method_request: "PUT _inference/text_embedding/jinaai-embeddings" +# type: "request" +value: |- + { + "service": "jinaai", + "service_settings": { + "model_id": "jina-embeddings-v3", + "api_key": "JinaAi-Api-key" + } + } diff --git a/specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample2.yaml b/specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample2.yaml new file mode 100644 index 0000000000..fb47dc5095 --- /dev/null +++ b/specification/inference/put_jinaai/examples/request/PutJinaAiRequestExample2.yaml @@ -0,0 +1,16 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service. +# method_request: "PUT _inference/rerank/jinaai-rerank" +# type: "request" +value: |- + { + "service": "jinaai", + "service_settings": { + "api_key": "JinaAI-Api-key", + "model_id": "jina-reranker-v2-base-multilingual" + }, + "task_settings": { + "top_n": 10, + "return_documents": true + } + }