From 08d34e6cc51e4e34794d039a17062744fcdb9464 Mon Sep 17 00:00:00 2001
From: Lisa Cawley <lcawley@elastic.co>
Date: Tue, 11 Mar 2025 14:35:48 -0700
Subject: [PATCH 1/2] Fix overlay for inference stream (#3930)

(cherry picked from commit 8eb28d3bb29aa5b6ab3ed726ab8e5bce562130d6)
---
 docs/overlays/elasticsearch-openapi-overlays.yaml | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/docs/overlays/elasticsearch-openapi-overlays.yaml b/docs/overlays/elasticsearch-openapi-overlays.yaml
index c140dbf824..6d125f616c 100644
--- a/docs/overlays/elasticsearch-openapi-overlays.yaml
+++ b/docs/overlays/elasticsearch-openapi-overlays.yaml
@@ -627,14 +627,15 @@ actions:
             indicesLegacyPutTemplateRequestExample1:
               $ref: "../../specification/indices/put_template/examples/request/indicesPutTemplateRequestExample1.yaml"
 ## Examples for inference
-  - target: "$.components['requestBodies']['inference.stream_inference']"
+  - target: "$.paths['/_inference/chat_completion/{inference_id}/_stream']['post']"
     description: "Add example for inference stream request"
     update:
-      content:
-        application/json:
-          examples:
-            streamInferenceRequestExample1:
-              $ref: "../../specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml"
+      requestBody:
+        content:
+          application/json:
+            examples:
+              streamInferenceRequestExample1:
+                $ref: "../../specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml"
 ## Examples for ingest
   - target: "$.components['requestBodies']['simulate.ingest']"
     description: "Add example for simulate ingest request"

From b5c24c7ddedd51876c7d6693ff31b73b1f049b52 Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Tue, 11 Mar 2025 14:46:10 -0700
Subject: [PATCH 2/2] Regenerate output

---
 output/openapi/elasticsearch-openapi.json     | 1266 +++++++++--------
 .../elasticsearch-serverless-openapi.json     | 1142 ++++++++-------
 output/schema/schema.json                     |    4 +-
 output/typescript/types.ts                    |    4 +-
 4 files changed, 1342 insertions(+), 1074 deletions(-)

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 02569f8681..a021c55663 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -16708,68 +16708,218 @@
         "x-state": "Added in 1.3.0"
       }
     },
-    "/_inference/{inference_id}": {
-      "get": {
+    "/_inference/chat_completion/{inference_id}/_stream": {
+      "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Get an inference endpoint",
-        "operationId": "inference-get-1",
+        "summary": "Perform chat completion inference",
+        "operationId": "inference-chat-completion-unified",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.get#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "messages": {
+                    "description": "A list of objects representing the conversation.",
+                    "type": "array",
+                    "items": {
+                      "$ref": "#/components/schemas/inference.chat_completion_unified:Message"
+                    }
+                  },
+                  "model": {
+                    "description": "The ID of the model to use.",
+                    "type": "string"
+                  },
+                  "max_completion_tokens": {
+                    "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+                    "type": "number"
+                  },
+                  "stop": {
+                    "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    }
+                  },
+                  "temperature": {
+                    "description": "The sampling temperature to use.",
+                    "type": "number"
+                  },
+                  "tool_choice": {
+                    "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType"
+                  },
+                  "tools": {
+                    "description": "A list of tools that the model can call.",
+                    "type": "array",
+                    "items": {
+                      "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool"
+                    }
+                  },
+                  "top_p": {
+                    "description": "Nucleus sampling, an alternative to sampling with temperature.",
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "messages"
+                ]
+              }
+            }
+          }
+        },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.get#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/_types:StreamResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.11.0"
-      },
-      "put": {
+        "x-state": "Added in 8.18.0"
+      }
+    },
+    "/_inference/completion/{inference_id}": {
+      "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Create an inference endpoint",
-        "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-        "operationId": "inference-put",
+        "summary": "Perform completion inference on the service",
+        "operationId": "inference-completion",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.put#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.put"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "Inference input.\nEither a string or an array of strings.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.put#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:CompletionInferenceResult"
+                }
+              }
+            }
           }
         },
         "x-state": "Added in 8.11.0"
-      },
-      "post": {
+      }
+    },
+    "/_inference/{inference_id}": {
+      "get": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform inference on the service",
-        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-        "operationId": "inference-inference",
+        "summary": "Get an inference endpoint",
+        "operationId": "inference-get-1",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.inference#inference_id"
-          },
+            "$ref": "#/components/parameters/inference.get#inference_id"
+          }
+        ],
+        "responses": {
+          "200": {
+            "$ref": "#/components/responses/inference.get#200"
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      },
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an inference endpoint",
+        "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+        "operationId": "inference-put",
+        "parameters": [
           {
-            "$ref": "#/components/parameters/inference.inference#timeout"
+            "$ref": "#/components/parameters/inference.put#inference_id"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.inference"
+          "$ref": "#/components/requestBodies/inference.put"
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.inference#200"
+            "$ref": "#/components/responses/inference.put#200"
           }
         },
         "x-state": "Added in 8.11.0"
@@ -16846,34 +16996,6 @@
         },
         "x-state": "Added in 8.11.0"
       },
-      "post": {
-        "tags": [
-          "inference"
-        ],
-        "summary": "Perform inference on the service",
-        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-        "operationId": "inference-inference-1",
-        "parameters": [
-          {
-            "$ref": "#/components/parameters/inference.inference#task_type"
-          },
-          {
-            "$ref": "#/components/parameters/inference.inference#inference_id"
-          },
-          {
-            "$ref": "#/components/parameters/inference.inference#timeout"
-          }
-        ],
-        "requestBody": {
-          "$ref": "#/components/requestBodies/inference.inference"
-        },
-        "responses": {
-          "200": {
-            "$ref": "#/components/responses/inference.inference#200"
-          }
-        },
-        "x-state": "Added in 8.11.0"
-      },
       "delete": {
         "tags": [
           "inference"
@@ -16985,114 +17107,308 @@
         "x-state": "Added in 8.16.0"
       }
     },
-    "/_inference/{inference_id}/_stream": {
+    "/_inference/rerank/{inference_id}": {
       "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform streaming inference",
-        "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
-        "operationId": "inference-stream-inference",
+        "summary": "Perform rereanking inference on the service",
+        "operationId": "inference-rerank",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.stream_inference#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The unique identifier for the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "The amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.stream_inference"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "query": {
+                    "description": "Query input.",
+                    "type": "string"
+                  },
+                  "input": {
+                    "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "query",
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.stream_inference#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:RerankedInferenceResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.16.0"
+        "x-state": "Added in 8.11.0"
       }
     },
-    "/_inference/{task_type}/{inference_id}/_stream": {
+    "/_inference/sparse_embedding/{inference_id}": {
       "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform streaming inference",
-        "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
-        "operationId": "inference-stream-inference-1",
+        "summary": "Perform sparse embedding inference on the service",
+        "operationId": "inference-sparse-embedding",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.stream_inference#task_type"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
           },
           {
-            "$ref": "#/components/parameters/inference.stream_inference#inference_id"
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.stream_inference"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "Inference input.\nEither a string or an array of strings.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.stream_inference#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:SparseEmbeddingInferenceResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.16.0"
+        "x-state": "Added in 8.11.0"
       }
     },
-    "/_inference/{inference_id}/_unified": {
+    "/_inference/completion/{inference_id}/_stream": {
       "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform inference on the service using the Unified Schema",
-        "operationId": "inference-unified-inference",
+        "summary": "Perform streaming inference",
+        "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
+        "operationId": "inference-stream-completion",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.unified_inference#inference_id"
-          },
-          {
-            "$ref": "#/components/parameters/inference.unified_inference#timeout"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The unique identifier for the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.unified_inference"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.unified_inference#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/_types:StreamResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.18.0"
+        "x-state": "Added in 8.16.0"
       }
     },
-    "/_inference/{task_type}/{inference_id}/_unified": {
+    "/_inference/text_embedding/{inference_id}": {
       "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform inference on the service using the Unified Schema",
-        "operationId": "inference-unified-inference-1",
+        "summary": "Perform text embedding inference on the service",
+        "operationId": "inference-text-embedding",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.unified_inference#task_type"
-          },
-          {
-            "$ref": "#/components/parameters/inference.unified_inference#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
           },
           {
-            "$ref": "#/components/parameters/inference.unified_inference#timeout"
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.unified_inference"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "Inference input.\nEither a string or an array of strings.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.unified_inference#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:TextEmbeddingInferenceResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.18.0"
+        "x-state": "Added in 8.11.0"
       }
     },
     "/_inference/{inference_id}/_update": {
-      "post": {
+      "put": {
         "tags": [
           "inference"
         ],
@@ -17116,7 +17432,7 @@
       }
     },
     "/_inference/{task_type}/{inference_id}/_update": {
-      "post": {
+      "put": {
         "tags": [
           "inference"
         ],
@@ -63288,7 +63604,9 @@
               "type": "object",
               "additionalProperties": {
                 "$ref": "#/components/schemas/_types.mapping:DynamicTemplate"
-              }
+              },
+              "minProperties": 1,
+              "maxProperties": 1
             }
           },
           "_field_names": {
@@ -74173,13 +74491,215 @@
           "valid"
         ]
       },
+      "inference.chat_completion_unified:Message": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent"
+          },
+          "role": {
+            "description": "The role of the message author.",
+            "type": "string"
+          },
+          "tool_call_id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "tool_calls": {
+            "description": "The tool calls generated by the model.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall"
+            }
+          }
+        },
+        "required": [
+          "role"
+        ]
+      },
+      "inference.chat_completion_unified:MessageContent": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject"
+            }
+          }
+        ]
+      },
+      "inference.chat_completion_unified:ContentObject": {
+        "type": "object",
+        "properties": {
+          "text": {
+            "description": "The text content.",
+            "type": "string"
+          },
+          "type": {
+            "description": "The type of content.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "text",
+          "type"
+        ]
+      },
+      "inference.chat_completion_unified:ToolCall": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction"
+          },
+          "type": {
+            "description": "The type of the tool call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "id",
+          "function",
+          "type"
+        ]
+      },
+      "inference.chat_completion_unified:ToolCallFunction": {
+        "type": "object",
+        "properties": {
+          "arguments": {
+            "description": "The arguments to call the function with in JSON format.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "arguments",
+          "name"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolType": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice"
+          }
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolChoice": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of the tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolChoiceFunction": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionTool": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolFunction": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function.",
+            "type": "string"
+          },
+          "parameters": {
+            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+            "type": "object"
+          },
+          "strict": {
+            "description": "Whether to enable schema adherence when generating the function call.",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "_types:StreamResult": {
+        "type": "object"
+      },
+      "inference._types:TaskSettings": {
+        "type": "object"
+      },
+      "inference._types:CompletionInferenceResult": {
+        "type": "object",
+        "properties": {
+          "completion": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:CompletionResult"
+            }
+          }
+        },
+        "required": [
+          "completion"
+        ]
+      },
+      "inference._types:CompletionResult": {
+        "type": "object",
+        "properties": {
+          "result": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "result"
+        ]
+      },
       "inference._types:TaskType": {
         "type": "string",
         "enum": [
           "sparse_embedding",
           "text_embedding",
           "rerank",
-          "completion"
+          "completion",
+          "chat_completion"
         ]
       },
       "inference._types:DeleteInferenceEndpointResult": {
@@ -74279,136 +74799,6 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
-      "inference._types:TaskSettings": {
-        "type": "object"
-      },
-      "inference._types:InferenceResult": {
-        "description": "InferenceResult is an aggregation of mutually exclusive variants",
-        "type": "object",
-        "properties": {
-          "text_embedding_bytes": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
-            }
-          },
-          "text_embedding_bits": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
-            }
-          },
-          "text_embedding": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:TextEmbeddingResult"
-            }
-          },
-          "sparse_embedding": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult"
-            }
-          },
-          "completion": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:CompletionResult"
-            }
-          },
-          "rerank": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:RankedDocument"
-            }
-          }
-        },
-        "minProperties": 1,
-        "maxProperties": 1
-      },
-      "inference._types:TextEmbeddingByteResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types:DenseByteVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types:DenseByteVector": {
-        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
-        "type": "array",
-        "items": {
-          "$ref": "#/components/schemas/_types:byte"
-        }
-      },
-      "inference._types:TextEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types:DenseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types:DenseVector": {
-        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
-        "type": "array",
-        "items": {
-          "type": "number"
-        }
-      },
-      "inference._types:SparseEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types:SparseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types:SparseVector": {
-        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
-        "type": "object",
-        "additionalProperties": {
-          "type": "number"
-        }
-      },
-      "inference._types:CompletionResult": {
-        "type": "object",
-        "properties": {
-          "result": {
-            "type": "string"
-          }
-        },
-        "required": [
-          "result"
-        ]
-      },
-      "inference._types:RankedDocument": {
-        "type": "object",
-        "properties": {
-          "index": {
-            "type": "number"
-          },
-          "relevance_score": {
-            "type": "number"
-          },
-          "text": {
-            "type": "string"
-          }
-        },
-        "required": [
-          "index",
-          "relevance_score"
-        ]
-      },
       "inference.put_watsonx:WatsonxTaskType": {
         "type": "string",
         "enum": [
@@ -74474,179 +74864,132 @@
           }
         }
       },
-      "_types:StreamResult": {
-        "type": "object"
-      },
-      "inference.unified_inference:Message": {
+      "inference._types:RerankedInferenceResult": {
         "type": "object",
         "properties": {
-          "content": {
-            "$ref": "#/components/schemas/inference.unified_inference:MessageContent"
-          },
-          "role": {
-            "description": "The role of the message author.",
-            "type": "string"
-          },
-          "tool_call_id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "tool_calls": {
-            "description": "The tool calls generated by the model.",
+          "rerank": {
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/inference.unified_inference:ToolCall"
+              "$ref": "#/components/schemas/inference._types:RankedDocument"
             }
           }
         },
         "required": [
-          "role"
+          "rerank"
         ]
       },
-      "inference.unified_inference:MessageContent": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference.unified_inference:ContentObject"
-            }
-          }
-        ]
-      },
-      "inference.unified_inference:ContentObject": {
+      "inference._types:RankedDocument": {
         "type": "object",
         "properties": {
-          "text": {
-            "description": "The text content.",
-            "type": "string"
+          "index": {
+            "type": "number"
           },
-          "type": {
-            "description": "The type of content.",
+          "relevance_score": {
+            "type": "number"
+          },
+          "text": {
             "type": "string"
           }
         },
         "required": [
-          "text",
-          "type"
+          "index",
+          "relevance_score"
         ]
       },
-      "inference.unified_inference:ToolCall": {
+      "inference._types:SparseEmbeddingInferenceResult": {
         "type": "object",
         "properties": {
-          "id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.unified_inference:ToolCallFunction"
-          },
-          "type": {
-            "description": "The type of the tool call.",
-            "type": "string"
+          "sparse_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult"
+            }
           }
         },
         "required": [
-          "id",
-          "function",
-          "type"
+          "sparse_embedding"
         ]
       },
-      "inference.unified_inference:ToolCallFunction": {
+      "inference._types:SparseEmbeddingResult": {
         "type": "object",
         "properties": {
-          "arguments": {
-            "description": "The arguments to call the function with in JSON format.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types:SparseVector"
           }
         },
         "required": [
-          "arguments",
-          "name"
+          "embedding"
         ]
       },
-      "inference.unified_inference:CompletionToolType": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoice"
-          }
-        ]
+      "inference._types:SparseVector": {
+        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+        "type": "object",
+        "additionalProperties": {
+          "type": "number"
+        }
       },
-      "inference.unified_inference:CompletionToolChoice": {
+      "inference._types:TextEmbeddingInferenceResult": {
+        "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
         "type": "object",
         "properties": {
-          "type": {
-            "description": "The type of the tool.",
-            "type": "string"
+          "text_embedding_bytes": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
+            }
           },
-          "function": {
-            "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoiceFunction"
+          "text_embedding_bits": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
+            }
+          },
+          "text_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:TextEmbeddingResult"
+            }
           }
         },
-        "required": [
-          "type",
-          "function"
-        ]
+        "minProperties": 1,
+        "maxProperties": 1
       },
-      "inference.unified_inference:CompletionToolChoiceFunction": {
+      "inference._types:TextEmbeddingByteResult": {
         "type": "object",
         "properties": {
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types:DenseByteVector"
           }
         },
         "required": [
-          "name"
+          "embedding"
         ]
       },
-      "inference.unified_inference:CompletionTool": {
-        "type": "object",
-        "properties": {
-          "type": {
-            "description": "The type of tool.",
-            "type": "string"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.unified_inference:CompletionToolFunction"
-          }
-        },
-        "required": [
-          "type",
-          "function"
-        ]
+      "inference._types:DenseByteVector": {
+        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
+        "type": "array",
+        "items": {
+          "$ref": "#/components/schemas/_types:byte"
+        }
       },
-      "inference.unified_inference:CompletionToolFunction": {
+      "inference._types:TextEmbeddingResult": {
         "type": "object",
         "properties": {
-          "description": {
-            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function.",
-            "type": "string"
-          },
-          "parameters": {
-            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
-            "type": "object"
-          },
-          "strict": {
-            "description": "Whether to enable schema adherence when generating the function call.",
-            "type": "boolean"
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types:DenseVector"
           }
         },
         "required": [
-          "name"
+          "embedding"
         ]
       },
+      "inference._types:DenseVector": {
+        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+        "type": "array",
+        "items": {
+          "type": "number"
+        }
+      },
       "_types:ElasticsearchVersionInfo": {
         "type": "object",
         "properties": {
@@ -96332,16 +96675,6 @@
           }
         }
       },
-      "inference.inference#200": {
-        "description": "",
-        "content": {
-          "application/json": {
-            "schema": {
-              "$ref": "#/components/schemas/inference._types:InferenceResult"
-            }
-          }
-        }
-      },
       "inference.put#200": {
         "description": "",
         "content": {
@@ -96352,26 +96685,6 @@
           }
         }
       },
-      "inference.stream_inference#200": {
-        "description": "",
-        "content": {
-          "application/json": {
-            "schema": {
-              "$ref": "#/components/schemas/_types:StreamResult"
-            }
-          }
-        }
-      },
-      "inference.unified_inference#200": {
-        "description": "",
-        "content": {
-          "application/json": {
-            "schema": {
-              "$ref": "#/components/schemas/_types:StreamResult"
-            }
-          }
-        }
-      },
       "inference.update#200": {
         "description": "",
         "content": {
@@ -104205,38 +104518,6 @@
         },
         "style": "simple"
       },
-      "inference.inference#task_type": {
-        "in": "path",
-        "name": "task_type",
-        "description": "The type of inference task that the model performs.",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/inference._types:TaskType"
-        },
-        "style": "simple"
-      },
-      "inference.inference#inference_id": {
-        "in": "path",
-        "name": "inference_id",
-        "description": "The unique identifier for the inference endpoint.",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Id"
-        },
-        "style": "simple"
-      },
-      "inference.inference#timeout": {
-        "in": "query",
-        "name": "timeout",
-        "description": "The amount of time to wait for the inference request to complete.",
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Duration"
-        },
-        "style": "form"
-      },
       "inference.put#task_type": {
         "in": "path",
         "name": "task_type",
@@ -104259,60 +104540,6 @@
         },
         "style": "simple"
       },
-      "inference.stream_inference#inference_id": {
-        "in": "path",
-        "name": "inference_id",
-        "description": "The unique identifier for the inference endpoint.",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Id"
-        },
-        "style": "simple"
-      },
-      "inference.stream_inference#task_type": {
-        "in": "path",
-        "name": "task_type",
-        "description": "The type of task that the model performs.",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/inference._types:TaskType"
-        },
-        "style": "simple"
-      },
-      "inference.unified_inference#task_type": {
-        "in": "path",
-        "name": "task_type",
-        "description": "The task type",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/inference._types:TaskType"
-        },
-        "style": "simple"
-      },
-      "inference.unified_inference#inference_id": {
-        "in": "path",
-        "name": "inference_id",
-        "description": "The inference Id",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Id"
-        },
-        "style": "simple"
-      },
-      "inference.unified_inference#timeout": {
-        "in": "query",
-        "name": "timeout",
-        "description": "Specifies the amount of time to wait for the inference request to complete.",
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Duration"
-        },
-        "style": "form"
-      },
       "inference.update#inference_id": {
         "in": "path",
         "name": "inference_id",
@@ -109607,7 +109834,9 @@
                     "type": "object",
                     "additionalProperties": {
                       "$ref": "#/components/schemas/_types.mapping:DynamicTemplate"
-                    }
+                    },
+                    "minProperties": 1,
+                    "maxProperties": 1
                   }
                 },
                 "_field_names": {
@@ -109845,41 +110074,6 @@
           }
         }
       },
-      "inference.inference": {
-        "content": {
-          "application/json": {
-            "schema": {
-              "type": "object",
-              "properties": {
-                "query": {
-                  "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
-                  "type": "string"
-                },
-                "input": {
-                  "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
-                  "oneOf": [
-                    {
-                      "type": "string"
-                    },
-                    {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      }
-                    }
-                  ]
-                },
-                "task_settings": {
-                  "$ref": "#/components/schemas/inference._types:TaskSettings"
-                }
-              },
-              "required": [
-                "input"
-              ]
-            }
-          }
-        }
-      },
       "inference.put": {
         "content": {
           "application/json": {
@@ -109890,88 +110084,6 @@
         },
         "required": true
       },
-      "inference.stream_inference": {
-        "content": {
-          "application/json": {
-            "schema": {
-              "type": "object",
-              "properties": {
-                "input": {
-                  "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.",
-                  "oneOf": [
-                    {
-                      "type": "string"
-                    },
-                    {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      }
-                    }
-                  ]
-                }
-              },
-              "required": [
-                "input"
-              ]
-            }
-          }
-        }
-      },
-      "inference.unified_inference": {
-        "content": {
-          "application/json": {
-            "schema": {
-              "type": "object",
-              "properties": {
-                "messages": {
-                  "description": "A list of objects representing the conversation.",
-                  "type": "array",
-                  "items": {
-                    "$ref": "#/components/schemas/inference.unified_inference:Message"
-                  }
-                },
-                "model": {
-                  "description": "The ID of the model to use.",
-                  "type": "string"
-                },
-                "max_completion_tokens": {
-                  "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-                  "type": "number"
-                },
-                "stop": {
-                  "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-                  "type": "array",
-                  "items": {
-                    "type": "string"
-                  }
-                },
-                "temperature": {
-                  "description": "The sampling temperature to use.",
-                  "type": "number"
-                },
-                "tool_choice": {
-                  "$ref": "#/components/schemas/inference.unified_inference:CompletionToolType"
-                },
-                "tools": {
-                  "description": "A list of tools that the model can call.",
-                  "type": "array",
-                  "items": {
-                    "$ref": "#/components/schemas/inference.unified_inference:CompletionTool"
-                  }
-                },
-                "top_p": {
-                  "description": "Nucleus sampling, an alternative to sampling with temperature.",
-                  "type": "number"
-                }
-              },
-              "required": [
-                "messages"
-              ]
-            }
-          }
-        }
-      },
       "inference.update": {
         "content": {
           "application/json": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 077160a516..5d1cf69083 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9055,68 +9055,218 @@
         "x-state": "Added in 1.3.0"
       }
     },
-    "/_inference/{inference_id}": {
-      "get": {
+    "/_inference/chat_completion/{inference_id}/_stream": {
+      "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Get an inference endpoint",
-        "operationId": "inference-get-1",
+        "summary": "Perform chat completion inference",
+        "operationId": "inference-chat-completion-unified",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.get#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "messages": {
+                    "description": "A list of objects representing the conversation.",
+                    "type": "array",
+                    "items": {
+                      "$ref": "#/components/schemas/inference.chat_completion_unified:Message"
+                    }
+                  },
+                  "model": {
+                    "description": "The ID of the model to use.",
+                    "type": "string"
+                  },
+                  "max_completion_tokens": {
+                    "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+                    "type": "number"
+                  },
+                  "stop": {
+                    "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    }
+                  },
+                  "temperature": {
+                    "description": "The sampling temperature to use.",
+                    "type": "number"
+                  },
+                  "tool_choice": {
+                    "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType"
+                  },
+                  "tools": {
+                    "description": "A list of tools that the model can call.",
+                    "type": "array",
+                    "items": {
+                      "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool"
+                    }
+                  },
+                  "top_p": {
+                    "description": "Nucleus sampling, an alternative to sampling with temperature.",
+                    "type": "number"
+                  }
+                },
+                "required": [
+                  "messages"
+                ]
+              }
+            }
+          }
+        },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.get#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/_types:StreamResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.11.0"
-      },
-      "put": {
+        "x-state": "Added in 8.18.0"
+      }
+    },
+    "/_inference/completion/{inference_id}": {
+      "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Create an inference endpoint",
-        "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-        "operationId": "inference-put",
+        "summary": "Perform completion inference on the service",
+        "operationId": "inference-completion",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.put#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.put"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "Inference input.\nEither a string or an array of strings.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.put#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:CompletionInferenceResult"
+                }
+              }
+            }
           }
         },
         "x-state": "Added in 8.11.0"
-      },
-      "post": {
+      }
+    },
+    "/_inference/{inference_id}": {
+      "get": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform inference on the service",
-        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-        "operationId": "inference-inference",
+        "summary": "Get an inference endpoint",
+        "operationId": "inference-get-1",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.inference#inference_id"
-          },
+            "$ref": "#/components/parameters/inference.get#inference_id"
+          }
+        ],
+        "responses": {
+          "200": {
+            "$ref": "#/components/responses/inference.get#200"
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      },
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an inference endpoint",
+        "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+        "operationId": "inference-put",
+        "parameters": [
           {
-            "$ref": "#/components/parameters/inference.inference#timeout"
+            "$ref": "#/components/parameters/inference.put#inference_id"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.inference"
+          "$ref": "#/components/requestBodies/inference.put"
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.inference#200"
+            "$ref": "#/components/responses/inference.put#200"
           }
         },
         "x-state": "Added in 8.11.0"
@@ -9193,34 +9343,6 @@
         },
         "x-state": "Added in 8.11.0"
       },
-      "post": {
-        "tags": [
-          "inference"
-        ],
-        "summary": "Perform inference on the service",
-        "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-        "operationId": "inference-inference-1",
-        "parameters": [
-          {
-            "$ref": "#/components/parameters/inference.inference#task_type"
-          },
-          {
-            "$ref": "#/components/parameters/inference.inference#inference_id"
-          },
-          {
-            "$ref": "#/components/parameters/inference.inference#timeout"
-          }
-        ],
-        "requestBody": {
-          "$ref": "#/components/requestBodies/inference.inference"
-        },
-        "responses": {
-          "200": {
-            "$ref": "#/components/responses/inference.inference#200"
-          }
-        },
-        "x-state": "Added in 8.11.0"
-      },
       "delete": {
         "tags": [
           "inference"
@@ -9332,59 +9454,237 @@
         "x-state": "Added in 8.16.0"
       }
     },
-    "/_inference/{inference_id}/_unified": {
+    "/_inference/rerank/{inference_id}": {
       "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform inference on the service using the Unified Schema",
-        "operationId": "inference-unified-inference",
+        "summary": "Perform rereanking inference on the service",
+        "operationId": "inference-rerank",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.unified_inference#inference_id"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The unique identifier for the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
           },
           {
-            "$ref": "#/components/parameters/inference.unified_inference#timeout"
+            "in": "query",
+            "name": "timeout",
+            "description": "The amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.unified_inference"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "query": {
+                    "description": "Query input.",
+                    "type": "string"
+                  },
+                  "input": {
+                    "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "query",
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.unified_inference#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:RerankedInferenceResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.18.0"
+        "x-state": "Added in 8.11.0"
       }
     },
-    "/_inference/{task_type}/{inference_id}/_unified": {
+    "/_inference/sparse_embedding/{inference_id}": {
       "post": {
         "tags": [
           "inference"
         ],
-        "summary": "Perform inference on the service using the Unified Schema",
-        "operationId": "inference-unified-inference-1",
+        "summary": "Perform sparse embedding inference on the service",
+        "operationId": "inference-sparse-embedding",
         "parameters": [
           {
-            "$ref": "#/components/parameters/inference.unified_inference#task_type"
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
           },
           {
-            "$ref": "#/components/parameters/inference.unified_inference#inference_id"
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "Inference input.\nEither a string or an array of strings.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:SparseEmbeddingInferenceResult"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 8.11.0"
+      }
+    },
+    "/_inference/text_embedding/{inference_id}": {
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform text embedding inference on the service",
+        "operationId": "inference-text-embedding",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "inference_id",
+            "description": "The inference Id",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
           },
           {
-            "$ref": "#/components/parameters/inference.unified_inference#timeout"
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
-          "$ref": "#/components/requestBodies/inference.unified_inference"
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "description": "Inference input.\nEither a string or an array of strings.",
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "$ref": "#/components/schemas/inference._types:TaskSettings"
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
         },
         "responses": {
           "200": {
-            "$ref": "#/components/responses/inference.unified_inference#200"
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:TextEmbeddingInferenceResult"
+                }
+              }
+            }
           }
         },
-        "x-state": "Added in 8.18.0"
+        "x-state": "Added in 8.11.0"
       }
     },
     "/": {
@@ -40935,7 +41235,9 @@
               "type": "object",
               "additionalProperties": {
                 "$ref": "#/components/schemas/_types.mapping:DynamicTemplate"
-              }
+              },
+              "minProperties": 1,
+              "maxProperties": 1
             }
           },
           "_field_names": {
@@ -46563,13 +46865,215 @@
           "valid"
         ]
       },
+      "inference.chat_completion_unified:Message": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent"
+          },
+          "role": {
+            "description": "The role of the message author.",
+            "type": "string"
+          },
+          "tool_call_id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "tool_calls": {
+            "description": "The tool calls generated by the model.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall"
+            }
+          }
+        },
+        "required": [
+          "role"
+        ]
+      },
+      "inference.chat_completion_unified:MessageContent": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject"
+            }
+          }
+        ]
+      },
+      "inference.chat_completion_unified:ContentObject": {
+        "type": "object",
+        "properties": {
+          "text": {
+            "description": "The text content.",
+            "type": "string"
+          },
+          "type": {
+            "description": "The type of content.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "text",
+          "type"
+        ]
+      },
+      "inference.chat_completion_unified:ToolCall": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction"
+          },
+          "type": {
+            "description": "The type of the tool call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "id",
+          "function",
+          "type"
+        ]
+      },
+      "inference.chat_completion_unified:ToolCallFunction": {
+        "type": "object",
+        "properties": {
+          "arguments": {
+            "description": "The arguments to call the function with in JSON format.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "arguments",
+          "name"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolType": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice"
+          }
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolChoice": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of the tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolChoiceFunction": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionTool": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference.chat_completion_unified:CompletionToolFunction": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function.",
+            "type": "string"
+          },
+          "parameters": {
+            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+            "type": "object"
+          },
+          "strict": {
+            "description": "Whether to enable schema adherence when generating the function call.",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "_types:StreamResult": {
+        "type": "object"
+      },
+      "inference._types:TaskSettings": {
+        "type": "object"
+      },
+      "inference._types:CompletionInferenceResult": {
+        "type": "object",
+        "properties": {
+          "completion": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:CompletionResult"
+            }
+          }
+        },
+        "required": [
+          "completion"
+        ]
+      },
+      "inference._types:CompletionResult": {
+        "type": "object",
+        "properties": {
+          "result": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "result"
+        ]
+      },
       "inference._types:TaskType": {
         "type": "string",
         "enum": [
           "sparse_embedding",
           "text_embedding",
           "rerank",
-          "completion"
+          "completion",
+          "chat_completion"
         ]
       },
       "inference._types:DeleteInferenceEndpointResult": {
@@ -46669,136 +47173,6 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
-      "inference._types:TaskSettings": {
-        "type": "object"
-      },
-      "inference._types:InferenceResult": {
-        "description": "InferenceResult is an aggregation of mutually exclusive variants",
-        "type": "object",
-        "properties": {
-          "text_embedding_bytes": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
-            }
-          },
-          "text_embedding_bits": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
-            }
-          },
-          "text_embedding": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:TextEmbeddingResult"
-            }
-          },
-          "sparse_embedding": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult"
-            }
-          },
-          "completion": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:CompletionResult"
-            }
-          },
-          "rerank": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference._types:RankedDocument"
-            }
-          }
-        },
-        "minProperties": 1,
-        "maxProperties": 1
-      },
-      "inference._types:TextEmbeddingByteResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types:DenseByteVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types:DenseByteVector": {
-        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
-        "type": "array",
-        "items": {
-          "$ref": "#/components/schemas/_types:byte"
-        }
-      },
-      "inference._types:TextEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types:DenseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types:DenseVector": {
-        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
-        "type": "array",
-        "items": {
-          "type": "number"
-        }
-      },
-      "inference._types:SparseEmbeddingResult": {
-        "type": "object",
-        "properties": {
-          "embedding": {
-            "$ref": "#/components/schemas/inference._types:SparseVector"
-          }
-        },
-        "required": [
-          "embedding"
-        ]
-      },
-      "inference._types:SparseVector": {
-        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
-        "type": "object",
-        "additionalProperties": {
-          "type": "number"
-        }
-      },
-      "inference._types:CompletionResult": {
-        "type": "object",
-        "properties": {
-          "result": {
-            "type": "string"
-          }
-        },
-        "required": [
-          "result"
-        ]
-      },
-      "inference._types:RankedDocument": {
-        "type": "object",
-        "properties": {
-          "index": {
-            "type": "number"
-          },
-          "relevance_score": {
-            "type": "number"
-          },
-          "text": {
-            "type": "string"
-          }
-        },
-        "required": [
-          "index",
-          "relevance_score"
-        ]
-      },
       "inference.put_watsonx:WatsonxTaskType": {
         "type": "string",
         "enum": [
@@ -46864,178 +47238,131 @@
           }
         }
       },
-      "inference.unified_inference:Message": {
+      "inference._types:RerankedInferenceResult": {
         "type": "object",
         "properties": {
-          "content": {
-            "$ref": "#/components/schemas/inference.unified_inference:MessageContent"
-          },
-          "role": {
-            "description": "The role of the message author.",
-            "type": "string"
-          },
-          "tool_call_id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "tool_calls": {
-            "description": "The tool calls generated by the model.",
+          "rerank": {
             "type": "array",
             "items": {
-              "$ref": "#/components/schemas/inference.unified_inference:ToolCall"
+              "$ref": "#/components/schemas/inference._types:RankedDocument"
             }
           }
         },
         "required": [
-          "role"
-        ]
-      },
-      "inference.unified_inference:MessageContent": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference.unified_inference:ContentObject"
-            }
-          }
+          "rerank"
         ]
       },
-      "inference.unified_inference:ContentObject": {
+      "inference._types:RankedDocument": {
         "type": "object",
         "properties": {
-          "text": {
-            "description": "The text content.",
-            "type": "string"
+          "index": {
+            "type": "number"
           },
-          "type": {
-            "description": "The type of content.",
+          "relevance_score": {
+            "type": "number"
+          },
+          "text": {
             "type": "string"
           }
         },
         "required": [
-          "text",
-          "type"
+          "index",
+          "relevance_score"
         ]
       },
-      "inference.unified_inference:ToolCall": {
+      "inference._types:SparseEmbeddingInferenceResult": {
         "type": "object",
         "properties": {
-          "id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.unified_inference:ToolCallFunction"
-          },
-          "type": {
-            "description": "The type of the tool call.",
-            "type": "string"
+          "sparse_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult"
+            }
           }
         },
         "required": [
-          "id",
-          "function",
-          "type"
+          "sparse_embedding"
         ]
       },
-      "inference.unified_inference:ToolCallFunction": {
+      "inference._types:SparseEmbeddingResult": {
         "type": "object",
         "properties": {
-          "arguments": {
-            "description": "The arguments to call the function with in JSON format.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types:SparseVector"
           }
         },
         "required": [
-          "arguments",
-          "name"
+          "embedding"
         ]
       },
-      "inference.unified_inference:CompletionToolType": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoice"
-          }
-        ]
+      "inference._types:SparseVector": {
+        "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+        "type": "object",
+        "additionalProperties": {
+          "type": "number"
+        }
       },
-      "inference.unified_inference:CompletionToolChoice": {
+      "inference._types:TextEmbeddingInferenceResult": {
+        "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
         "type": "object",
         "properties": {
-          "type": {
-            "description": "The type of the tool.",
-            "type": "string"
+          "text_embedding_bytes": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
+            }
           },
-          "function": {
-            "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoiceFunction"
+          "text_embedding_bits": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult"
+            }
+          },
+          "text_embedding": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:TextEmbeddingResult"
+            }
           }
         },
-        "required": [
-          "type",
-          "function"
-        ]
+        "minProperties": 1,
+        "maxProperties": 1
       },
-      "inference.unified_inference:CompletionToolChoiceFunction": {
+      "inference._types:TextEmbeddingByteResult": {
         "type": "object",
         "properties": {
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types:DenseByteVector"
           }
         },
         "required": [
-          "name"
+          "embedding"
         ]
       },
-      "inference.unified_inference:CompletionTool": {
-        "type": "object",
-        "properties": {
-          "type": {
-            "description": "The type of tool.",
-            "type": "string"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.unified_inference:CompletionToolFunction"
-          }
-        },
-        "required": [
-          "type",
-          "function"
-        ]
+      "inference._types:DenseByteVector": {
+        "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
+        "type": "array",
+        "items": {
+          "$ref": "#/components/schemas/_types:byte"
+        }
       },
-      "inference.unified_inference:CompletionToolFunction": {
+      "inference._types:TextEmbeddingResult": {
         "type": "object",
         "properties": {
-          "description": {
-            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function.",
-            "type": "string"
-          },
-          "parameters": {
-            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
-            "type": "object"
-          },
-          "strict": {
-            "description": "Whether to enable schema adherence when generating the function call.",
-            "type": "boolean"
+          "embedding": {
+            "$ref": "#/components/schemas/inference._types:DenseVector"
           }
         },
         "required": [
-          "name"
+          "embedding"
         ]
       },
-      "_types:StreamResult": {
-        "type": "object"
+      "inference._types:DenseVector": {
+        "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+        "type": "array",
+        "items": {
+          "type": "number"
+        }
       },
       "_types:ElasticsearchVersionInfo": {
         "type": "object",
@@ -57201,16 +57528,6 @@
           }
         }
       },
-      "inference.inference#200": {
-        "description": "",
-        "content": {
-          "application/json": {
-            "schema": {
-              "$ref": "#/components/schemas/inference._types:InferenceResult"
-            }
-          }
-        }
-      },
       "inference.put#200": {
         "description": "",
         "content": {
@@ -57221,16 +57538,6 @@
           }
         }
       },
-      "inference.unified_inference#200": {
-        "description": "",
-        "content": {
-          "application/json": {
-            "schema": {
-              "$ref": "#/components/schemas/_types:StreamResult"
-            }
-          }
-        }
-      },
       "ingest.get_pipeline#200": {
         "description": "",
         "content": {
@@ -61321,38 +61628,6 @@
         },
         "style": "simple"
       },
-      "inference.inference#task_type": {
-        "in": "path",
-        "name": "task_type",
-        "description": "The type of inference task that the model performs.",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/inference._types:TaskType"
-        },
-        "style": "simple"
-      },
-      "inference.inference#inference_id": {
-        "in": "path",
-        "name": "inference_id",
-        "description": "The unique identifier for the inference endpoint.",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Id"
-        },
-        "style": "simple"
-      },
-      "inference.inference#timeout": {
-        "in": "query",
-        "name": "timeout",
-        "description": "The amount of time to wait for the inference request to complete.",
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Duration"
-        },
-        "style": "form"
-      },
       "inference.put#task_type": {
         "in": "path",
         "name": "task_type",
@@ -61375,38 +61650,6 @@
         },
         "style": "simple"
       },
-      "inference.unified_inference#task_type": {
-        "in": "path",
-        "name": "task_type",
-        "description": "The task type",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/inference._types:TaskType"
-        },
-        "style": "simple"
-      },
-      "inference.unified_inference#inference_id": {
-        "in": "path",
-        "name": "inference_id",
-        "description": "The inference Id",
-        "required": true,
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Id"
-        },
-        "style": "simple"
-      },
-      "inference.unified_inference#timeout": {
-        "in": "query",
-        "name": "timeout",
-        "description": "Specifies the amount of time to wait for the inference request to complete.",
-        "deprecated": false,
-        "schema": {
-          "$ref": "#/components/schemas/_types:Duration"
-        },
-        "style": "form"
-      },
       "ingest.get_pipeline#id": {
         "in": "path",
         "name": "id",
@@ -64264,7 +64507,9 @@
                     "type": "object",
                     "additionalProperties": {
                       "$ref": "#/components/schemas/_types.mapping:DynamicTemplate"
-                    }
+                    },
+                    "minProperties": 1,
+                    "maxProperties": 1
                   }
                 },
                 "_field_names": {
@@ -64452,41 +64697,6 @@
           }
         }
       },
-      "inference.inference": {
-        "content": {
-          "application/json": {
-            "schema": {
-              "type": "object",
-              "properties": {
-                "query": {
-                  "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
-                  "type": "string"
-                },
-                "input": {
-                  "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
-                  "oneOf": [
-                    {
-                      "type": "string"
-                    },
-                    {
-                      "type": "array",
-                      "items": {
-                        "type": "string"
-                      }
-                    }
-                  ]
-                },
-                "task_settings": {
-                  "$ref": "#/components/schemas/inference._types:TaskSettings"
-                }
-              },
-              "required": [
-                "input"
-              ]
-            }
-          }
-        }
-      },
       "inference.put": {
         "content": {
           "application/json": {
@@ -64497,60 +64707,6 @@
         },
         "required": true
       },
-      "inference.unified_inference": {
-        "content": {
-          "application/json": {
-            "schema": {
-              "type": "object",
-              "properties": {
-                "messages": {
-                  "description": "A list of objects representing the conversation.",
-                  "type": "array",
-                  "items": {
-                    "$ref": "#/components/schemas/inference.unified_inference:Message"
-                  }
-                },
-                "model": {
-                  "description": "The ID of the model to use.",
-                  "type": "string"
-                },
-                "max_completion_tokens": {
-                  "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-                  "type": "number"
-                },
-                "stop": {
-                  "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-                  "type": "array",
-                  "items": {
-                    "type": "string"
-                  }
-                },
-                "temperature": {
-                  "description": "The sampling temperature to use.",
-                  "type": "number"
-                },
-                "tool_choice": {
-                  "$ref": "#/components/schemas/inference.unified_inference:CompletionToolType"
-                },
-                "tools": {
-                  "description": "A list of tools that the model can call.",
-                  "type": "array",
-                  "items": {
-                    "$ref": "#/components/schemas/inference.unified_inference:CompletionTool"
-                  }
-                },
-                "top_p": {
-                  "description": "Nucleus sampling, an alternative to sampling with temperature.",
-                  "type": "number"
-                }
-              },
-              "required": [
-                "messages"
-              ]
-            }
-          }
-        }
-      },
       "ingest.simulate": {
         "content": {
           "application/json": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 1feed84b09..5843a6ce2a 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -80704,7 +80704,7 @@
                   "namespace": "_builtins"
                 }
               },
-              "singleKey": false,
+              "singleKey": true,
               "value": {
                 "kind": "instance_of",
                 "type": {
@@ -139821,7 +139821,7 @@
                     "namespace": "_builtins"
                   }
                 },
-                "singleKey": false,
+                "singleKey": true,
                 "value": {
                   "kind": "instance_of",
                   "type": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 6cc6cccff2..2f85704756 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -5889,7 +5889,7 @@ export interface MappingTypeMapping {
   date_detection?: boolean
   dynamic?: MappingDynamicMapping
   dynamic_date_formats?: string[]
-  dynamic_templates?: Record<string, MappingDynamicTemplate>[]
+  dynamic_templates?: Partial<Record<string, MappingDynamicTemplate>>[]
   _field_names?: MappingFieldNamesField
   index_field?: MappingIndexField
   _meta?: Metadata
@@ -12350,7 +12350,7 @@ export interface IndicesPutMappingRequest extends RequestBase {
     date_detection?: boolean
     dynamic?: MappingDynamicMapping
     dynamic_date_formats?: string[]
-    dynamic_templates?: Record<string, MappingDynamicTemplate>[]
+    dynamic_templates?: Partial<Record<string, MappingDynamicTemplate>>[]
     _field_names?: MappingFieldNamesField
     _meta?: Metadata
     numeric_detection?: boolean