diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 3ea793b677..bdf72f3267 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17830,6 +17830,84 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{huggingface_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Hugging Face inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-hugging-face", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "huggingface_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_hugging_face:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutHuggingFaceRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -77038,6 +77116,41 @@ } } }, + "inference.put_hugging_face:HuggingFaceTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_hugging_face:ServiceType": { + "type": "string", + "enum": [ + "hugging_face" + ] + }, + "inference.put_hugging_face:HuggingFaceServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://huggingface.co/settings/tokens" + }, + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL endpoint to use for the requests.", + "type": "string" + } + }, + "required": [ + "api_key", + "url" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 4c0e9d129b..d9cf50a21a 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9652,6 +9652,84 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{huggingface_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Hugging Face inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-hugging-face", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "huggingface_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_hugging_face:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutHuggingFaceRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -48230,6 +48308,41 @@ } } }, + "inference.put_hugging_face:HuggingFaceTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_hugging_face:ServiceType": { + "type": "string", + "enum": [ + "hugging_face" + ] + }, + "inference.put_hugging_face:HuggingFaceServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://huggingface.co/settings/tokens" + }, + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL endpoint to use for the requests.", + "type": "string" + } + }, + "required": [ + "api_key", + "url" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index daa0a586f1..335c9e714f 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4636,6 +4636,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-huggingface", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html", + "name": "inference.put_hugging_face", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{huggingface_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27111,6 +27156,119 @@ }, "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + } + } + } + ] + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "huggingface_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -100625,6 +100783,32 @@ }, "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L91-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "hugging_face" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97" + }, { "kind": "enum", "members": [ @@ -121151,6 +121335,54 @@ ], "specLocation": "inference/_types/Services.ts#L95-L100" }, + { + "kind": "interface", + "name": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + }, + "properties": [ + { + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "huggingface-tokens", + "extDocUrl": "https://huggingface.co/settings/tokens", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL endpoint to use for the requests.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120" + }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 2f0e81c394..47cf7d73a2 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9348,6 +9348,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-huggingface", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html", + "name": "inference.put_hugging_face", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{huggingface_inference_id}" + } + ] + }, { "availability": { "stack": { @@ -150616,6 +150661,193 @@ }, "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" }, + { + "kind": "interface", + "name": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + }, + "properties": [ + { + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "huggingface-tokens", + "extDocUrl": "https://huggingface.co/settings/tokens", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL endpoint to use for the requests.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L91-L93" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + } + } + } + ] + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "huggingface_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "hugging_face" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index a129e27fbc..2179267af7 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13272,6 +13272,28 @@ export type InferencePutEisResponse = InferenceInferenceEndpointInfo export type InferencePutEisServiceType = 'elastic' +export interface InferencePutHuggingFaceHuggingFaceServiceSettings { + api_key: string + rate_limit?: InferenceRateLimitSetting + url: string +} + +export type InferencePutHuggingFaceHuggingFaceTaskType = 'text_embedding' + +export interface InferencePutHuggingFaceRequest extends RequestBase { + task_type: InferencePutHuggingFaceHuggingFaceTaskType + huggingface_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutHuggingFaceServiceType + service_settings: InferencePutHuggingFaceHuggingFaceServiceSettings + } +} + +export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfo + +export type InferencePutHuggingFaceServiceType = 'hugging_face' + export interface InferencePutOpenaiOpenAIServiceSettings { api_key: string dimensions?: integer diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 9532dedc92..e9c1cdaa0f 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -243,6 +243,7 @@ grok,https://www.elastic.co/guide/en/elasticsearch/reference/current/grok.html grok-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/grok-processor.html gsub-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/gsub-processor.html health-api,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-health-report +huggingface-tokens,https://huggingface.co/settings/tokens ilm-delete-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-delete-lifecycle ilm-explain-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-explain-lifecycle ilm-get-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-get-lifecycle @@ -319,6 +320,7 @@ inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/o inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html +inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx diff --git a/specification/_json_spec/inference.put_hugging_face.json b/specification/_json_spec/inference.put_hugging_face.json new file mode 100644 index 0000000000..76965d61ba --- /dev/null +++ b/specification/_json_spec/inference.put_hugging_face.json @@ -0,0 +1,35 @@ +{ + "inference.put_hugging_face": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html", + "description": "Configure a HuggingFace inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{huggingface_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "huggingface_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts new file mode 100644 index 0000000000..5660473643 --- /dev/null +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -0,0 +1,120 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create a Hugging Face inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `hugging_face` service. + * + * You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. + * Select the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section. + * Create the endpoint and copy the URL after the endpoint initialization has been finished. + * + * The following models are recommended for the Hugging Face service: + * + * * `all-MiniLM-L6-v2` + * * `all-MiniLM-L12-v2` + * * `all-mpnet-base-v2` + * * `e5-base-v2` + * * `e5-small-v2` + * * `multilingual-e5-base` + * * `multilingual-e5-small` + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_hugging_face + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-huggingface + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{huggingface_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: HuggingFaceTaskType + /** + * The unique identifier of the inference endpoint. + */ + huggingface_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `hugging_face`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `hugging_face` service. + */ + service_settings: HuggingFaceServiceSettings + } +} + +export enum HuggingFaceTaskType { + text_embedding +} + +export enum ServiceType { + hugging_face +} + +export class HuggingFaceServiceSettings { + /** + * A valid access token for your HuggingFace account. + * You can create or find your access tokens on the HuggingFace settings page. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id huggingface-tokens + */ + api_key: string + /** + * This setting helps to minimize the number of rate limit errors returned from Hugging Face. + * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000. + */ + rate_limit?: RateLimitSetting + /** + * The URL endpoint to use for the requests. + */ + url: string +} diff --git a/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts b/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml new file mode 100644 index 0000000000..4e557251a5 --- /dev/null +++ b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml @@ -0,0 +1,12 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type. +# method_request: "PUT _inference/text_embedding/hugging-face-embeddings" +# type: "request" +value: |- + { + "service": "hugging_face", + "service_settings": { + "api_key": "hugging-face-access-token", + "url": "url-endpoint" + } + }