diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index bf58cf0c34..a0fd2c2d49 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -18355,7 +18355,7 @@
         "tags": [
           "inference"
         ],
-        "summary": "Create an OpenAI inference endpoint",
+        "summary": "Create an Elasticsearch inference endpoint",
         "description": "Create an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
         "operationId": "inference-put-elasticsearch",
         "parameters": [
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 55dfe3bd72..b448419298 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -10177,7 +10177,7 @@
         "tags": [
           "inference"
         ],
-        "summary": "Create an OpenAI inference endpoint",
+        "summary": "Create an Elasticsearch inference endpoint",
         "description": "Create an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
         "operationId": "inference-put-elasticsearch",
         "parameters": [
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index a6778dfa87..5d3d82692d 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4605,7 +4605,7 @@
       },
       "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-alibabacloud",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud",
       "name": "inference.put_alibabacloud",
       "privileges": {
         "cluster": [
@@ -4649,8 +4649,8 @@
         }
       },
       "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
-      "docId": "inference-api-amazonbedrock",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html",
+      "docId": "inference-api-put-amazonbedrock",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock",
       "name": "inference.put_amazonbedrock",
       "privileges": {
         "cluster": [
@@ -4694,8 +4694,8 @@
         }
       },
       "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
-      "docId": "inference-api-anthropic",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html",
+      "docId": "inference-api-put-anthropic",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic",
       "name": "inference.put_anthropic",
       "privileges": {
         "cluster": [
@@ -4740,7 +4740,7 @@
       },
       "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-azureaistudio",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio",
       "name": "inference.put_azureaistudio",
       "privileges": {
         "cluster": [
@@ -4785,7 +4785,7 @@
       },
       "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-azureopenai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai",
       "name": "inference.put_azureopenai",
       "privileges": {
         "cluster": [
@@ -4830,7 +4830,7 @@
       },
       "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-cohere",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere",
       "name": "inference.put_cohere",
       "privileges": {
         "cluster": [
@@ -4875,7 +4875,7 @@
       },
       "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
       "docId": "inference-api-put-eis",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis",
       "name": "inference.put_eis",
       "privileges": {
         "cluster": [
@@ -4918,9 +4918,9 @@
           "visibility": "public"
         }
       },
-      "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "description": "Create an Elasticsearch inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-elasticsearch",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elasticsearch.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch",
       "name": "inference.put_elasticsearch",
       "privileges": {
         "cluster": [
@@ -4969,7 +4969,7 @@
       },
       "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-elser",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser",
       "name": "inference.put_elser",
       "privileges": {
         "cluster": [
@@ -5014,7 +5014,7 @@
       },
       "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-googleaistudio",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio",
       "name": "inference.put_googleaistudio",
       "privileges": {
         "cluster": [
@@ -5059,7 +5059,7 @@
       },
       "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-googlevertexai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai",
       "name": "inference.put_googlevertexai",
       "privileges": {
         "cluster": [
@@ -5104,7 +5104,7 @@
       },
       "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-huggingface",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face",
       "name": "inference.put_hugging_face",
       "privileges": {
         "cluster": [
@@ -5149,7 +5149,7 @@
       },
       "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to <https://jina.ai/reranker>.\nTo review the available `text_embedding` models, refer to the <https://jina.ai/embeddings/>.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-jinaai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai",
       "name": "inference.put_jinaai",
       "privileges": {
         "cluster": [
@@ -5194,7 +5194,7 @@
       },
       "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-mistral",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-mistral.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral",
       "name": "inference.put_mistral",
       "privileges": {
         "cluster": [
@@ -5239,7 +5239,7 @@
       },
       "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service or `openai` compatible APIs.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-openai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai",
       "name": "inference.put_openai",
       "privileges": {
         "cluster": [
@@ -28543,7 +28543,7 @@
           }
         ]
       },
-      "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "description": "Create an Elasticsearch inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "examples": {
         "PutElasticsearchRequestExample1": {
           "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 2e8348233c..366188ab18 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9317,7 +9317,7 @@
       },
       "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-alibabacloud",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud",
       "name": "inference.put_alibabacloud",
       "privileges": {
         "cluster": [
@@ -9361,8 +9361,8 @@
         }
       },
       "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
-      "docId": "inference-api-amazonbedrock",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html",
+      "docId": "inference-api-put-amazonbedrock",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock",
       "name": "inference.put_amazonbedrock",
       "privileges": {
         "cluster": [
@@ -9406,8 +9406,8 @@
         }
       },
       "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
-      "docId": "inference-api-anthropic",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html",
+      "docId": "inference-api-put-anthropic",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic",
       "name": "inference.put_anthropic",
       "privileges": {
         "cluster": [
@@ -9452,7 +9452,7 @@
       },
       "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-azureaistudio",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio",
       "name": "inference.put_azureaistudio",
       "privileges": {
         "cluster": [
@@ -9497,7 +9497,7 @@
       },
       "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-azureopenai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai",
       "name": "inference.put_azureopenai",
       "privileges": {
         "cluster": [
@@ -9542,7 +9542,7 @@
       },
       "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-cohere",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere",
       "name": "inference.put_cohere",
       "privileges": {
         "cluster": [
@@ -9587,7 +9587,7 @@
       },
       "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).",
       "docId": "inference-api-put-eis",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis",
       "name": "inference.put_eis",
       "privileges": {
         "cluster": [
@@ -9630,9 +9630,9 @@
           "visibility": "public"
         }
       },
-      "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "description": "Create an Elasticsearch inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-elasticsearch",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elasticsearch.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch",
       "name": "inference.put_elasticsearch",
       "privileges": {
         "cluster": [
@@ -9681,7 +9681,7 @@
       },
       "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-elser",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser",
       "name": "inference.put_elser",
       "privileges": {
         "cluster": [
@@ -9726,7 +9726,7 @@
       },
       "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-googleaistudio",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio",
       "name": "inference.put_googleaistudio",
       "privileges": {
         "cluster": [
@@ -9771,7 +9771,7 @@
       },
       "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-googlevertexai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai",
       "name": "inference.put_googlevertexai",
       "privileges": {
         "cluster": [
@@ -9816,7 +9816,7 @@
       },
       "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-huggingface",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face",
       "name": "inference.put_hugging_face",
       "privileges": {
         "cluster": [
@@ -9861,7 +9861,7 @@
       },
       "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to <https://jina.ai/reranker>.\nTo review the available `text_embedding` models, refer to the <https://jina.ai/embeddings/>.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-jinaai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai",
       "name": "inference.put_jinaai",
       "privileges": {
         "cluster": [
@@ -9906,7 +9906,7 @@
       },
       "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-mistral",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-mistral.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral",
       "name": "inference.put_mistral",
       "privileges": {
         "cluster": [
@@ -9951,7 +9951,7 @@
       },
       "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service or `openai` compatible APIs.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "docId": "inference-api-put-openai",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai",
       "name": "inference.put_openai",
       "privileges": {
         "cluster": [
@@ -153191,7 +153191,7 @@
           }
         ]
       },
-      "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "description": "Create an Elasticsearch inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
       "examples": {
         "PutElasticsearchRequestExample1": {
           "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.",
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index df19aeea0b..3c70811697 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -331,26 +331,26 @@ indices-templates,https://www.elastic.co/guide/en/elasticsearch/reference/curren
 indices-update-settings,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings
 infer-trained-model,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-infer-trained-model
 infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-trained-model-deployment.html
-inference-api-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html
 inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete
 inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get
 inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference
 inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion
 inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put
-inference-api-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html
-inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html
-inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html
-inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html
-inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html
-inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html
-inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elasticsearch.html
-inference-api-put-elser,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html
-inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html
-inference-api-put-jinaai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html
-inference-api-put-googlevertexai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html
-inference-api-put-googleaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html
-inference-api-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-mistral.html
-inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html
+inference-api-put-alibabacloud,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud
+inference-api-put-amazonbedrock,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock
+inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic
+inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio
+inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai
+inference-api-put-cohere,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere
+inference-api-put-eis,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis
+inference-api-put-elasticsearch,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch
+inference-api-put-elser,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser
+inference-api-put-googleaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio
+inference-api-put-googlevertexai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai
+inference-api-put-huggingface,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face
+inference-api-put-jinaai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai
+inference-api-put-mistral,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral
+inference-api-put-openai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai
 inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai
 inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx
 inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference
diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
index 8ac3d0262f..f1647d5549 100644
--- a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
+++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
@@ -42,7 +42,7 @@ import { float, integer } from '@_types/Numeric'
  * @availability stack since=8.12.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
  * @cluster_privileges manage_inference
- * @doc_id inference-api-amazonbedrock
+ * @doc_id inference-api-put-amazonbedrock
  */
 export interface Request extends RequestBase {
   urls: [
diff --git a/specification/inference/put_anthropic/PutAnthropicRequest.ts b/specification/inference/put_anthropic/PutAnthropicRequest.ts
index 95d2f61d24..3e8f18a81a 100644
--- a/specification/inference/put_anthropic/PutAnthropicRequest.ts
+++ b/specification/inference/put_anthropic/PutAnthropicRequest.ts
@@ -39,7 +39,7 @@ import { float, integer } from '@_types/Numeric'
  * @availability stack since=8.16.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
  * @cluster_privileges manage_inference
- * @doc_id inference-api-anthropic
+ * @doc_id inference-api-put-anthropic
  */
 export interface Request extends RequestBase {
   urls: [
diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
index a863e8ba45..5430ebb8e7 100644
--- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
+++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -23,7 +23,7 @@ import { Id } from '@_types/common'
 import { integer } from '@_types/Numeric'
 
 /**
- * Create an OpenAI inference endpoint.
+ * Create an Elasticsearch inference endpoint.
  *
  * Create an inference endpoint to perform an inference task with the `elasticsearch` service.
  *