From 60d4d8c0a3887212aab1a7fceceb6d7d61b74e7f Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 06:50:23 -0700 Subject: [PATCH] Add Hugging Face inference API details (#4029) (cherry picked from commit 41120d3350a9c6eb4ead17caec46055aa6ff96ef) --- output/openapi/elasticsearch-openapi.json | 113 +++++++++ .../elasticsearch-serverless-openapi.json | 113 +++++++++ output/schema/schema-serverless.json | 232 ++++++++++++++++++ output/schema/schema.json | 232 ++++++++++++++++++ output/typescript/types.ts | 22 ++ specification/_doc_ids/table.csv | 2 + .../inference.put_hugging_face.json | 35 +++ .../put_hugging_face/PutHuggingFaceRequest.ts | 120 +++++++++ .../PutHuggingFaceResponse.ts | 24 ++ .../PutHuggingFaceRequestExample1.yaml | 12 + 10 files changed, 905 insertions(+) create mode 100644 specification/_json_spec/inference.put_hugging_face.json create mode 100644 specification/inference/put_hugging_face/PutHuggingFaceRequest.ts create mode 100644 specification/inference/put_hugging_face/PutHuggingFaceResponse.ts create mode 100644 specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 499fee58ea..69db955779 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17929,6 +17929,84 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{huggingface_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Hugging Face inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-hugging-face", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "huggingface_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_hugging_face:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutHuggingFaceRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -77035,6 +77113,41 @@ } } }, + "inference.put_hugging_face:HuggingFaceTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_hugging_face:ServiceType": { + "type": "string", + "enum": [ + "hugging_face" + ] + }, + "inference.put_hugging_face:HuggingFaceServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://huggingface.co/settings/tokens" + }, + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL endpoint to use for the requests.", + "type": "string" + } + }, + "required": [ + "api_key", + "url" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 433057454f..4f836cc6ca 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9885,6 +9885,84 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{huggingface_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Hugging Face inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-hugging-face", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "huggingface_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_hugging_face:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutHuggingFaceRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -48355,6 +48433,41 @@ } } }, + "inference.put_hugging_face:HuggingFaceTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_hugging_face:ServiceType": { + "type": "string", + "enum": [ + "hugging_face" + ] + }, + "inference.put_hugging_face:HuggingFaceServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://huggingface.co/settings/tokens" + }, + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL endpoint to use for the requests.", + "type": "string" + } + }, + "required": [ + "api_key", + "url" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index b17011fc86..683d9674a7 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4803,6 +4803,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-huggingface", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html", + "name": "inference.put_hugging_face", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{huggingface_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27595,6 +27640,119 @@ }, "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + } + } + } + ] + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "huggingface_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -100440,6 +100598,32 @@ }, "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L91-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "hugging_face" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97" + }, { "kind": "enum", "members": [ @@ -121017,6 +121201,54 @@ ], "specLocation": "inference/_types/Services.ts#L95-L100" }, + { + "kind": "interface", + "name": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + }, + "properties": [ + { + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "huggingface-tokens", + "extDocUrl": "https://huggingface.co/settings/tokens", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL endpoint to use for the requests.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120" + }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index efb55bee92..dd91c0a3d6 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9397,6 +9397,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-huggingface", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html", + "name": "inference.put_hugging_face", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{huggingface_inference_id}" + } + ] + }, { "availability": { "stack": { @@ -150631,6 +150676,193 @@ }, "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" }, + { + "kind": "interface", + "name": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + }, + "properties": [ + { + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "huggingface-tokens", + "extDocUrl": "https://huggingface.co/settings/tokens", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL endpoint to use for the requests.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L91-L93" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + } + } + } + ] + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "huggingface_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "hugging_face" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 86487afe79..3e9c8369ca 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13272,6 +13272,28 @@ export type InferencePutEisResponse = InferenceInferenceEndpointInfo export type InferencePutEisServiceType = 'elastic' +export interface InferencePutHuggingFaceHuggingFaceServiceSettings { + api_key: string + rate_limit?: InferenceRateLimitSetting + url: string +} + +export type InferencePutHuggingFaceHuggingFaceTaskType = 'text_embedding' + +export interface InferencePutHuggingFaceRequest extends RequestBase { + task_type: InferencePutHuggingFaceHuggingFaceTaskType + huggingface_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutHuggingFaceServiceType + service_settings: InferencePutHuggingFaceHuggingFaceServiceSettings + } +} + +export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfo + +export type InferencePutHuggingFaceServiceType = 'hugging_face' + export interface InferencePutOpenaiOpenAIServiceSettings { api_key: string dimensions?: integer diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 0433bc33a8..59bbb80be3 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -244,6 +244,7 @@ grok,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/grok.html grok-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/grok-processor.html gsub-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/gsub-processor.html health-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/health-api.html +huggingface-tokens,https://huggingface.co/settings/tokens ilm-delete-lifecycle,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ilm-delete-lifecycle.html ilm-explain-lifecycle,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ilm-explain-lifecycle.html ilm-get-lifecycle,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ilm-get-lifecycle.html @@ -320,6 +321,7 @@ inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{bran inference-api-post-eis-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elastic.html +inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html inference-api-put-voyageai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-voyageai.html inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html diff --git a/specification/_json_spec/inference.put_hugging_face.json b/specification/_json_spec/inference.put_hugging_face.json new file mode 100644 index 0000000000..76965d61ba --- /dev/null +++ b/specification/_json_spec/inference.put_hugging_face.json @@ -0,0 +1,35 @@ +{ + "inference.put_hugging_face": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html", + "description": "Configure a HuggingFace inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{huggingface_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "huggingface_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts new file mode 100644 index 0000000000..5660473643 --- /dev/null +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -0,0 +1,120 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create a Hugging Face inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `hugging_face` service. + * + * You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. + * Select the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section. + * Create the endpoint and copy the URL after the endpoint initialization has been finished. + * + * The following models are recommended for the Hugging Face service: + * + * * `all-MiniLM-L6-v2` + * * `all-MiniLM-L12-v2` + * * `all-mpnet-base-v2` + * * `e5-base-v2` + * * `e5-small-v2` + * * `multilingual-e5-base` + * * `multilingual-e5-small` + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_hugging_face + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-huggingface + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{huggingface_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: HuggingFaceTaskType + /** + * The unique identifier of the inference endpoint. + */ + huggingface_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `hugging_face`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `hugging_face` service. + */ + service_settings: HuggingFaceServiceSettings + } +} + +export enum HuggingFaceTaskType { + text_embedding +} + +export enum ServiceType { + hugging_face +} + +export class HuggingFaceServiceSettings { + /** + * A valid access token for your HuggingFace account. + * You can create or find your access tokens on the HuggingFace settings page. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id huggingface-tokens + */ + api_key: string + /** + * This setting helps to minimize the number of rate limit errors returned from Hugging Face. + * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000. + */ + rate_limit?: RateLimitSetting + /** + * The URL endpoint to use for the requests. + */ + url: string +} diff --git a/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts b/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml new file mode 100644 index 0000000000..4e557251a5 --- /dev/null +++ b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample1.yaml @@ -0,0 +1,12 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type. +# method_request: "PUT _inference/text_embedding/hugging-face-embeddings" +# type: "request" +value: |- + { + "service": "hugging_face", + "service_settings": { + "api_key": "hugging-face-access-token", + "url": "url-endpoint" + } + }