diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index c8bc1f658a..b3ae287d4f 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18548,6 +18548,92 @@ "x-state": "Added in 8.15.0" } }, + "/_inference/{task_type}/{googlevertexai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Google Vertex AI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-googlevertexai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "googlevertexai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_googlevertexai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutGoogleVertexAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{huggingface_inference_id}": { "put": { "tags": [ @@ -78456,6 +78542,68 @@ "model_id" ] }, + "inference.put_googlevertexai:GoogleVertexAITaskType": { + "type": "string", + "enum": [ + "rerank", + "text_embedding" + ] + }, + "inference.put_googlevertexai:ServiceType": { + "type": "string", + "enum": [ + "googlevertexai" + ] + }, + "inference.put_googlevertexai:GoogleVertexAIServiceSettings": { + "type": "object", + "properties": { + "location": { + "externalDocs": { + "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" + }, + "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "type": "string" + }, + "project_id": { + "description": "The name of the project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "service_account_json": { + "description": "A valid service account in JSON format for the Google Vertex AI API.", + "type": "string" + } + }, + "required": [ + "location", + "model_id", + "project_id", + "service_account_json" + ] + }, + "inference.put_googlevertexai:GoogleVertexAITaskSettings": { + "type": "object", + "properties": { + "auto_truncate": { + "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of the top N documents that should be returned.", + "type": "number" + } + } + }, "inference.put_hugging_face:HuggingFaceTaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 9fcba4f969..ab8986a659 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -10374,6 +10374,92 @@ "x-state": "Added in 8.15.0" } }, + "/_inference/{task_type}/{googlevertexai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Google Vertex AI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-googlevertexai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "googlevertexai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_googlevertexai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutGoogleVertexAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{huggingface_inference_id}": { "put": { "tags": [ @@ -49652,6 +49738,68 @@ "model_id" ] }, + "inference.put_googlevertexai:GoogleVertexAITaskType": { + "type": "string", + "enum": [ + "rerank", + "text_embedding" + ] + }, + "inference.put_googlevertexai:ServiceType": { + "type": "string", + "enum": [ + "googlevertexai" + ] + }, + "inference.put_googlevertexai:GoogleVertexAIServiceSettings": { + "type": "object", + "properties": { + "location": { + "externalDocs": { + "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" + }, + "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "type": "string" + }, + "project_id": { + "description": "The name of the project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "service_account_json": { + "description": "A valid service account in JSON format for the Google Vertex AI API.", + "type": "string" + } + }, + "required": [ + "location", + "model_id", + "project_id", + "service_account_json" + ] + }, + "inference.put_googlevertexai:GoogleVertexAITaskSettings": { + "type": "object", + "properties": { + "auto_truncate": { + "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of the top N documents that should be returned.", + "type": "number" + } + } + }, "inference.put_hugging_face:HuggingFaceTaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index fcd8cda6f0..c198e9d959 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4600,6 +4600,7 @@ "stack": { <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD ======= <<<<<<< HEAD <<<<<<< HEAD @@ -5080,6 +5081,8 @@ "visibility": "public" }, "stack": { +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "since": "8.15.0", "stability": "stable", "visibility": "public" @@ -5126,6 +5129,7 @@ }, "stack": { <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= @@ -5455,6 +5459,8 @@ "visibility": "public" }, "stack": { +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -30557,6 +30563,136 @@ }, "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAIServiceSettings", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAITaskSettings", + "namespace": "inference.put_googlevertexai" + } + } + } + ] + }, + "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutGoogleVertexAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample2": { + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "summary": "A rerank task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_googlevertexai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAITaskType", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "googlevertexai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -103895,6 +104031,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "completion" }, @@ -103973,6 +104110,8 @@ { ======= >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "rerank" }, { @@ -103980,6 +104119,7 @@ } ], "name": { +<<<<<<< HEAD "name": "CohereTaskType", "namespace": "inference.put_cohere" }, @@ -104349,6 +104489,8 @@ >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "GoogleVertexAITaskType", "namespace": "inference.put_googlevertexai" }, @@ -104356,6 +104498,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) @@ -104363,6 +104506,8 @@ >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "enum", @@ -104371,6 +104516,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "azureopenai" ======= @@ -104385,6 +104531,9 @@ ======= "name": "googlevertexai" >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= + "name": "googlevertexai" +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) } ], "name": { @@ -104392,6 +104541,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "namespace": "inference.put_azureopenai" }, @@ -104554,6 +104704,11 @@ "namespace": "inference.put_mistral" }, "specLocation": "inference/put_mistral/PutMistralRequest.ts#L83-L85" +======= + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "enum", @@ -124978,6 +125133,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" @@ -126048,6 +126204,8 @@ { "kind": "interface", "name": { +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "GoogleVertexAIServiceSettings", "namespace": "inference.put_googlevertexai" }, @@ -126110,6 +126268,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) @@ -126117,6 +126276,8 @@ >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "required": true, "type": { "kind": "instance_of", @@ -126130,6 +126291,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" ======= @@ -126144,6 +126306,9 @@ ======= "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "interface", @@ -126151,6 +126316,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "AzureOpenAITaskSettings", "namespace": "inference.put_azureopenai" @@ -126166,6 +126332,30 @@ >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "GoogleVertexAITaskSettings", "namespace": "inference.put_googlevertexai" }, @@ -126176,6 +126366,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) @@ -126183,6 +126374,8 @@ >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "required": false, "type": { "kind": "instance_of", @@ -126190,6 +126383,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "string", "namespace": "_builtins" @@ -126205,6 +126399,8 @@ >>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= >>>>>>> b82415b5e (Add ELSER inference API details (#4026)) +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "boolean", "namespace": "_builtins" } @@ -126227,6 +126423,7 @@ <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) @@ -126468,6 +126665,8 @@ } ], "specLocation": "inference/put_mistral/PutMistralRequest.ts#L87-L114" +======= +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "interface", diff --git a/output/schema/schema.json b/output/schema/schema.json index 193c81e2fb..57d5616a52 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9712,6 +9712,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.15.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-googlevertexai", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html", + "name": "inference.put_googlevertexai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_googlevertexai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_googlevertexai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{googlevertexai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -153413,6 +153458,273 @@ }, "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" }, + { + "kind": "interface", + "name": { + "name": "GoogleVertexAIServiceSettings", + "namespace": "inference.put_googlevertexai" + }, + "properties": [ + { + "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "extDocId": "googlevertexai-locations", + "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations", + "name": "location", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "extDocId": "googlevertexai-models", + "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the project to use for the inference task.", + "name": "project_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.\nBy default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "A valid service account in JSON format for the Google Vertex AI API.", + "name": "service_account_json", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" + }, + { + "kind": "interface", + "name": { + "name": "GoogleVertexAITaskSettings", + "namespace": "inference.put_googlevertexai" + }, + "properties": [ + { + "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", + "name": "auto_truncate", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `rerank` task, the number of the top N documents that should be returned.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L120-L129" + }, + { + "kind": "enum", + "members": [ + { + "name": "rerank" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "GoogleVertexAITaskType", + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L83-L86" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAIServiceSettings", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAITaskSettings", + "namespace": "inference.put_googlevertexai" + } + } + } + ] + }, + "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutGoogleVertexAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample2": { + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "summary": "A rerank task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_googlevertexai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAITaskType", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "googlevertexai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "googlevertexai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index f5f0aa7a3e..601ec99892 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13528,6 +13528,36 @@ export type InferencePutGoogleaistudioResponse = InferenceInferenceEndpointInfo export type InferencePutGoogleaistudioServiceType = 'googleaistudio' +export interface InferencePutGooglevertexaiGoogleVertexAIServiceSettings { + location: string + model_id: string + project_id: string + rate_limit?: InferenceRateLimitSetting + service_account_json: string +} + +export interface InferencePutGooglevertexaiGoogleVertexAITaskSettings { + auto_truncate?: boolean + top_n?: integer +} + +export type InferencePutGooglevertexaiGoogleVertexAITaskType = 'rerank' | 'text_embedding' + +export interface InferencePutGooglevertexaiRequest extends RequestBase { + task_type: InferencePutGooglevertexaiGoogleVertexAITaskType + googlevertexai_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutGooglevertexaiServiceType + service_settings: InferencePutGooglevertexaiGoogleVertexAIServiceSettings + task_settings?: InferencePutGooglevertexaiGoogleVertexAITaskSettings + } +} + +export type InferencePutGooglevertexaiResponse = InferenceInferenceEndpointInfo + +export type InferencePutGooglevertexaiServiceType = 'googlevertexai' + export interface InferencePutHuggingFaceHuggingFaceServiceSettings { api_key: string rate_limit?: InferenceRateLimitSetting diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 8d7f0ed40a..9387f3ad4d 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -251,7 +251,12 @@ get-trained-models,https://www.elastic.co/docs/api/doc/elasticsearch/operation/o get-transform-stats,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-transform-get-transform-stats get-transform,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-transform-get-transform get-trial-status,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-license-get-trial-status +<<<<<<< HEAD googleaistudio-models,https://ai.google.dev/gemini-api/docs/models +======= +googlevertexai-locations,https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations +googlevertexai-models,https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api +>>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) graph,https://www.elastic.co/guide/en/kibana/current/xpack-graph.html graph-explore-api,https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-graph grok,https://www.elastic.co/guide/en/elasticsearch/reference/current/grok.html diff --git a/specification/_json_spec/inference.put_googlevertexai.json b/specification/_json_spec/inference.put_googlevertexai.json new file mode 100644 index 0000000000..6068d4cbc9 --- /dev/null +++ b/specification/_json_spec/inference.put_googlevertexai.json @@ -0,0 +1,35 @@ +{ + "inference.put_googlevertexai": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html", + "description": "Configure a Google Vertex AI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{googlevertexai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "googlevertexai_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts new file mode 100644 index 0000000000..978a384d20 --- /dev/null +++ b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts @@ -0,0 +1,129 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { integer } from '@_types/Numeric' + +/** + * Create a Google Vertex AI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `googlevertexai` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_googlevertexai + * @availability stack since=8.15.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-googlevertexai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{googlevertexai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: GoogleVertexAITaskType + /** + * The unique identifier of the inference endpoint. + */ + googlevertexai_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `googlevertexai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `googlevertexai` service. + */ + service_settings: GoogleVertexAIServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: GoogleVertexAITaskSettings + } +} + +export enum GoogleVertexAITaskType { + rerank, + text_embedding +} + +export enum ServiceType { + googlevertexai +} + +export class GoogleVertexAIServiceSettings { + /** + * The name of the location to use for the inference task. + * Refer to the Google documentation for the list of supported locations. + * @ext_doc_id googlevertexai-locations + */ + location: string + /** + * The name of the model to use for the inference task. + * Refer to the Google documentation for the list of supported models. + * @ext_doc_id googlevertexai-models + */ + model_id: string + /** + * The name of the project to use for the inference task. + */ + project_id: string + /** + * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI. + * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000. + */ + rate_limit?: RateLimitSetting + /** + * A valid service account in JSON format for the Google Vertex AI API. + */ + service_account_json: string +} + +export class GoogleVertexAITaskSettings { + /** + * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically. + */ + auto_truncate?: boolean + /** + * For a `rerank` task, the number of the top N documents that should be returned. + */ + top_n?: integer +} diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample1.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample1.yaml new file mode 100644 index 0000000000..f48192a497 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type. +# method_request: "PUT _inference/text_embedding/google_vertex_ai_embeddingss" +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "service_account_json": "service-account-json", + "model_id": "model-id", + "location": "location", + "project_id": "project-id" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample2.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample2.yaml new file mode 100644 index 0000000000..8aae51a607 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample2.yaml @@ -0,0 +1,12 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type. +# method_request: "PUT _inference/rerank/google_vertex_ai_rerank" +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "service_account_json": "service-account-json", + "project_id": "project-id" + } + }