diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 6c45fd480d..0e9cfc6281 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17451,52 +17451,7 @@
           "content": {
             "application/json": {
               "schema": {
-                "type": "object",
-                "properties": {
-                  "messages": {
-                    "description": "A list of objects representing the conversation.",
-                    "type": "array",
-                    "items": {
-                      "$ref": "#/components/schemas/inference.chat_completion_unified:Message"
-                    }
-                  },
-                  "model": {
-                    "description": "The ID of the model to use.",
-                    "type": "string"
-                  },
-                  "max_completion_tokens": {
-                    "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-                    "type": "number"
-                  },
-                  "stop": {
-                    "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-                    "type": "array",
-                    "items": {
-                      "type": "string"
-                    }
-                  },
-                  "temperature": {
-                    "description": "The sampling temperature to use.",
-                    "type": "number"
-                  },
-                  "tool_choice": {
-                    "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType"
-                  },
-                  "tools": {
-                    "description": "A list of tools that the model can call.",
-                    "type": "array",
-                    "items": {
-                      "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool"
-                    }
-                  },
-                  "top_p": {
-                    "description": "Nucleus sampling, an alternative to sampling with temperature.",
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "messages"
-                ]
+                "type": "object"
               }
             }
           }
@@ -17748,6 +17703,51 @@
         "x-state": "Added in 8.11.0"
       }
     },
+    "/_inference/chat_completion/{eis_inference_id}/_stream": {
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform a chat completion task through the Elastic Inference Service (EIS)",
+        "description": "Perform a chat completion inference task with the `elastic` service.",
+        "operationId": "inference-post-eis-chat-completion",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "eis_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/_types:StreamResult"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 9.0.0"
+      }
+    },
     "/_inference/{task_type}/{eis_inference_id}": {
       "put": {
         "tags": [
@@ -76808,176 +76808,6 @@
           "valid"
         ]
       },
-      "inference.chat_completion_unified:Message": {
-        "type": "object",
-        "properties": {
-          "content": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent"
-          },
-          "role": {
-            "description": "The role of the message author.",
-            "type": "string"
-          },
-          "tool_call_id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "tool_calls": {
-            "description": "The tool calls generated by the model.",
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall"
-            }
-          }
-        },
-        "required": [
-          "role"
-        ]
-      },
-      "inference.chat_completion_unified:MessageContent": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject"
-            }
-          }
-        ]
-      },
-      "inference.chat_completion_unified:ContentObject": {
-        "type": "object",
-        "properties": {
-          "text": {
-            "description": "The text content.",
-            "type": "string"
-          },
-          "type": {
-            "description": "The type of content.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "text",
-          "type"
-        ]
-      },
-      "inference.chat_completion_unified:ToolCall": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction"
-          },
-          "type": {
-            "description": "The type of the tool call.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "id",
-          "function",
-          "type"
-        ]
-      },
-      "inference.chat_completion_unified:ToolCallFunction": {
-        "type": "object",
-        "properties": {
-          "arguments": {
-            "description": "The arguments to call the function with in JSON format.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "arguments",
-          "name"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolType": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice"
-          }
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolChoice": {
-        "type": "object",
-        "properties": {
-          "type": {
-            "description": "The type of the tool.",
-            "type": "string"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction"
-          }
-        },
-        "required": [
-          "type",
-          "function"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolChoiceFunction": {
-        "type": "object",
-        "properties": {
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "name"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionTool": {
-        "type": "object",
-        "properties": {
-          "type": {
-            "description": "The type of tool.",
-            "type": "string"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction"
-          }
-        },
-        "required": [
-          "type",
-          "function"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolFunction": {
-        "type": "object",
-        "properties": {
-          "description": {
-            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function.",
-            "type": "string"
-          },
-          "parameters": {
-            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
-            "type": "object"
-          },
-          "strict": {
-            "description": "Whether to enable schema adherence when generating the function call.",
-            "type": "boolean"
-          }
-        },
-        "required": [
-          "name"
-        ]
-      },
       "_types:StreamResult": {
         "type": "object"
       },
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index ef165f4ddf..655bd5617d 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9273,52 +9273,7 @@
           "content": {
             "application/json": {
               "schema": {
-                "type": "object",
-                "properties": {
-                  "messages": {
-                    "description": "A list of objects representing the conversation.",
-                    "type": "array",
-                    "items": {
-                      "$ref": "#/components/schemas/inference.chat_completion_unified:Message"
-                    }
-                  },
-                  "model": {
-                    "description": "The ID of the model to use.",
-                    "type": "string"
-                  },
-                  "max_completion_tokens": {
-                    "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-                    "type": "number"
-                  },
-                  "stop": {
-                    "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-                    "type": "array",
-                    "items": {
-                      "type": "string"
-                    }
-                  },
-                  "temperature": {
-                    "description": "The sampling temperature to use.",
-                    "type": "number"
-                  },
-                  "tool_choice": {
-                    "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType"
-                  },
-                  "tools": {
-                    "description": "A list of tools that the model can call.",
-                    "type": "array",
-                    "items": {
-                      "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool"
-                    }
-                  },
-                  "top_p": {
-                    "description": "Nucleus sampling, an alternative to sampling with temperature.",
-                    "type": "number"
-                  }
-                },
-                "required": [
-                  "messages"
-                ]
+                "type": "object"
               }
             }
           }
@@ -9570,6 +9525,51 @@
         "x-state": "Added in 8.11.0"
       }
     },
+    "/_inference/chat_completion/{eis_inference_id}/_stream": {
+      "post": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Perform a chat completion task through the Elastic Inference Service (EIS)",
+        "description": "Perform a chat completion inference task with the `elastic` service.",
+        "operationId": "inference-post-eis-chat-completion",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "eis_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/_types:StreamResult"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Added in 9.0.0"
+      }
+    },
     "/_inference/{task_type}/{eis_inference_id}": {
       "put": {
         "tags": [
@@ -48000,176 +48000,6 @@
           "valid"
         ]
       },
-      "inference.chat_completion_unified:Message": {
-        "type": "object",
-        "properties": {
-          "content": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent"
-          },
-          "role": {
-            "description": "The role of the message author.",
-            "type": "string"
-          },
-          "tool_call_id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "tool_calls": {
-            "description": "The tool calls generated by the model.",
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall"
-            }
-          }
-        },
-        "required": [
-          "role"
-        ]
-      },
-      "inference.chat_completion_unified:MessageContent": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject"
-            }
-          }
-        ]
-      },
-      "inference.chat_completion_unified:ContentObject": {
-        "type": "object",
-        "properties": {
-          "text": {
-            "description": "The text content.",
-            "type": "string"
-          },
-          "type": {
-            "description": "The type of content.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "text",
-          "type"
-        ]
-      },
-      "inference.chat_completion_unified:ToolCall": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "$ref": "#/components/schemas/_types:Id"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction"
-          },
-          "type": {
-            "description": "The type of the tool call.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "id",
-          "function",
-          "type"
-        ]
-      },
-      "inference.chat_completion_unified:ToolCallFunction": {
-        "type": "object",
-        "properties": {
-          "arguments": {
-            "description": "The arguments to call the function with in JSON format.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "arguments",
-          "name"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolType": {
-        "oneOf": [
-          {
-            "type": "string"
-          },
-          {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice"
-          }
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolChoice": {
-        "type": "object",
-        "properties": {
-          "type": {
-            "description": "The type of the tool.",
-            "type": "string"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction"
-          }
-        },
-        "required": [
-          "type",
-          "function"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolChoiceFunction": {
-        "type": "object",
-        "properties": {
-          "name": {
-            "description": "The name of the function to call.",
-            "type": "string"
-          }
-        },
-        "required": [
-          "name"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionTool": {
-        "type": "object",
-        "properties": {
-          "type": {
-            "description": "The type of tool.",
-            "type": "string"
-          },
-          "function": {
-            "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction"
-          }
-        },
-        "required": [
-          "type",
-          "function"
-        ]
-      },
-      "inference.chat_completion_unified:CompletionToolFunction": {
-        "type": "object",
-        "properties": {
-          "description": {
-            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
-            "type": "string"
-          },
-          "name": {
-            "description": "The name of the function.",
-            "type": "string"
-          },
-          "parameters": {
-            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
-            "type": "object"
-          },
-          "strict": {
-            "description": "Whether to enable schema adherence when generating the function call.",
-            "type": "boolean"
-          }
-        },
-        "required": [
-          "name"
-        ]
-      },
       "_types:StreamResult": {
         "type": "object"
       },
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index aa66f92b23..ecaa181942 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4495,6 +4495,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "9.0.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.",
+      "docId": "inference-api-post-eis-chat-completion",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion",
+      "name": "inference.post_eis_chat_completion",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/chat_completion/{eis_inference_id}/_stream"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -26490,119 +26535,13 @@
       ],
       "body": {
         "kind": "properties",
-        "properties": [
-          {
-            "description": "A list of objects representing the conversation.",
-            "name": "messages",
-            "required": true,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "Message",
-                  "namespace": "inference.chat_completion_unified"
-                }
-              }
-            }
-          },
-          {
-            "description": "The ID of the model to use.",
-            "name": "model",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
-              }
-            }
-          },
-          {
-            "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-            "name": "max_completion_tokens",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "long",
-                "namespace": "_types"
-              }
-            }
-          },
-          {
-            "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-            "name": "stop",
-            "required": false,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "string",
-                  "namespace": "_builtins"
-                }
-              }
-            }
-          },
-          {
-            "description": "The sampling temperature to use.",
-            "name": "temperature",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "float",
-                "namespace": "_types"
-              }
-            }
-          },
-          {
-            "description": "Controls which tool is called by the model.",
-            "name": "tool_choice",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "CompletionToolType",
-                "namespace": "inference.chat_completion_unified"
-              }
-            }
-          },
-          {
-            "description": "A list of tools that the model can call.",
-            "name": "tools",
-            "required": false,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "CompletionTool",
-                  "namespace": "inference.chat_completion_unified"
-                }
-              }
-            }
-          },
-          {
-            "description": "Nucleus sampling, an alternative to sampling with temperature.",
-            "name": "top_p",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "float",
-                "namespace": "_types"
-              }
-            }
-          }
-        ]
+        "properties": []
       },
       "description": "Perform chat completion inference",
       "inherits": {
         "type": {
-          "name": "RequestBase",
-          "namespace": "_types"
+          "name": "RequestChatCompletionBase",
+          "namespace": "inference._types"
         }
       },
       "kind": "request",
@@ -26639,7 +26578,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L25-L52"
     },
     {
       "body": {
@@ -26936,6 +26875,61 @@
       },
       "specLocation": "inference/get/GetResponse.ts#L22-L26"
     },
+    {
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": []
+      },
+      "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.",
+      "inherits": {
+        "type": {
+          "name": "RequestChatCompletionBase",
+          "namespace": "inference._types"
+        }
+      },
+      "kind": "request",
+      "name": {
+        "name": "Request",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "path": [
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "eis_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L46"
+    },
+    {
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "StreamResult",
+            "namespace": "_types"
+          }
+        }
+      },
+      "kind": "response",
+      "name": {
+        "name": "Response",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts#L22-L24"
+    },
     {
       "attachedBehaviors": [
         "CommonQueryParameters"
@@ -100399,7 +100393,7 @@
         "name": "CompletionToolType",
         "namespace": "inference.chat_completion_unified"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92",
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L54-L57",
       "type": {
         "items": [
           {
@@ -100453,7 +100447,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L143-L155"
     },
     {
       "description": "The tool choice function.",
@@ -100476,7 +100470,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L132-L141"
     },
     {
       "codegenNames": [
@@ -100488,7 +100482,7 @@
         "name": "MessageContent",
         "namespace": "inference.chat_completion_unified"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143",
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L105-L108",
       "type": {
         "items": [
           {
@@ -100545,7 +100539,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71"
     },
     {
       "kind": "enum",
@@ -120438,6 +120432,130 @@
       ],
       "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L32-L37"
     },
+    {
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "RequestChatCompletionBase",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A list of objects representing the conversation.",
+          "name": "messages",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "Message",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          }
+        },
+        {
+          "description": "The ID of the model to use.",
+          "name": "model",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+          "name": "max_completion_tokens",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "long",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+          "name": "stop",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          }
+        },
+        {
+          "description": "The sampling temperature to use.",
+          "name": "temperature",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Controls which tool is called by the model.",
+          "name": "tool_choice",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolType",
+              "namespace": "inference.chat_completion_unified"
+            }
+          }
+        },
+        {
+          "description": "A list of tools that the model can call.",
+          "name": "tools",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionTool",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          }
+        },
+        {
+          "description": "Nucleus sampling, an alternative to sampling with temperature.",
+          "name": "top_p",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L28-L61"
+    },
     {
       "description": "An object representing part of the conversation.",
       "kind": "interface",
@@ -120498,7 +120616,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L110-L130"
     },
     {
       "description": "A tool call generated by the model.",
@@ -120545,7 +120663,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L87-L103"
     },
     {
       "description": "The function that the model called.",
@@ -120580,7 +120698,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L73-L85"
     },
     {
       "description": "A list of tools that the model can call.",
@@ -120615,7 +120733,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L180-L192"
     },
     {
       "description": "The completion tool function definition.",
@@ -120670,7 +120788,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L157-L178"
     },
     {
       "description": "Defines the completion result.",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 8671596959..e84cb90589 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9207,6 +9207,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "9.0.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.",
+      "docId": "inference-api-post-eis-chat-completion",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion",
+      "name": "inference.post_eis_chat_completion",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/chat_completion/{eis_inference_id}/_stream"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -149167,6 +149212,130 @@
       ],
       "specLocation": "inference/_types/Services.ts#L95-L100"
     },
+    {
+      "kind": "interface",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "RequestChatCompletionBase",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A list of objects representing the conversation.",
+          "name": "messages",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "Message",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          }
+        },
+        {
+          "description": "The ID of the model to use.",
+          "name": "model",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+          "name": "max_completion_tokens",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "long",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+          "name": "stop",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          }
+        },
+        {
+          "description": "The sampling temperature to use.",
+          "name": "temperature",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Controls which tool is called by the model.",
+          "name": "tool_choice",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolType",
+              "namespace": "inference.chat_completion_unified"
+            }
+          }
+        },
+        {
+          "description": "A list of tools that the model can call.",
+          "name": "tools",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionTool",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          }
+        },
+        {
+          "description": "Nucleus sampling, an alternative to sampling with temperature.",
+          "name": "top_p",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L28-L61"
+    },
     {
       "kind": "interface",
       "description": "Defines the response for a rerank request.",
@@ -149445,7 +149614,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L180-L192"
     },
     {
       "kind": "interface",
@@ -149480,7 +149649,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L143-L155"
     },
     {
       "kind": "interface",
@@ -149503,7 +149672,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L132-L141"
     },
     {
       "kind": "interface",
@@ -149558,7 +149727,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L157-L178"
     },
     {
       "kind": "type_alias",
@@ -149570,7 +149739,7 @@
         "name": "CompletionToolType",
         "namespace": "inference.chat_completion_unified"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92",
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L54-L57",
       "type": {
         "kind": "union_of",
         "items": [
@@ -149624,7 +149793,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71"
     },
     {
       "kind": "interface",
@@ -149686,7 +149855,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L110-L130"
     },
     {
       "kind": "type_alias",
@@ -149698,7 +149867,7 @@
         "name": "MessageContent",
         "namespace": "inference.chat_completion_unified"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143",
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L105-L108",
       "type": {
         "kind": "union_of",
         "items": [
@@ -149729,119 +149898,13 @@
       ],
       "body": {
         "kind": "properties",
-        "properties": [
-          {
-            "description": "A list of objects representing the conversation.",
-            "name": "messages",
-            "required": true,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "Message",
-                  "namespace": "inference.chat_completion_unified"
-                }
-              }
-            }
-          },
-          {
-            "description": "The ID of the model to use.",
-            "name": "model",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
-              }
-            }
-          },
-          {
-            "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-            "name": "max_completion_tokens",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "long",
-                "namespace": "_types"
-              }
-            }
-          },
-          {
-            "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-            "name": "stop",
-            "required": false,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "string",
-                  "namespace": "_builtins"
-                }
-              }
-            }
-          },
-          {
-            "description": "The sampling temperature to use.",
-            "name": "temperature",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "float",
-                "namespace": "_types"
-              }
-            }
-          },
-          {
-            "description": "Controls which tool is called by the model.",
-            "name": "tool_choice",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "CompletionToolType",
-                "namespace": "inference.chat_completion_unified"
-              }
-            }
-          },
-          {
-            "description": "A list of tools that the model can call.",
-            "name": "tools",
-            "required": false,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "CompletionTool",
-                  "namespace": "inference.chat_completion_unified"
-                }
-              }
-            }
-          },
-          {
-            "description": "Nucleus sampling, an alternative to sampling with temperature.",
-            "name": "top_p",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "float",
-                "namespace": "_types"
-              }
-            }
-          }
-        ]
+        "properties": []
       },
       "description": "Perform chat completion inference",
       "inherits": {
         "type": {
-          "name": "RequestBase",
-          "namespace": "_types"
+          "name": "RequestChatCompletionBase",
+          "namespace": "inference._types"
         }
       },
       "name": {
@@ -149877,7 +149940,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L25-L52"
     },
     {
       "kind": "response",
@@ -149942,7 +150005,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L87-L103"
     },
     {
       "kind": "interface",
@@ -149977,7 +150040,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L73-L85"
     },
     {
       "kind": "request",
@@ -150256,6 +150319,61 @@
       },
       "specLocation": "inference/get/GetResponse.ts#L22-L26"
     },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": []
+      },
+      "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.",
+      "inherits": {
+        "type": {
+          "name": "RequestChatCompletionBase",
+          "namespace": "inference._types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "path": [
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "eis_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L46"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "StreamResult",
+            "namespace": "_types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.post_eis_chat_completion"
+      },
+      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts#L22-L24"
+    },
     {
       "kind": "request",
       "attachedBehaviors": [
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index ce67705ca0..a129e27fbc 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13109,6 +13109,17 @@ export interface InferenceRateLimitSetting {
   requests_per_minute?: integer
 }
 
+export interface InferenceRequestChatCompletionBase extends RequestBase {
+  messages: InferenceChatCompletionUnifiedMessage[]
+  model?: string
+  max_completion_tokens?: long
+  stop?: string[]
+  temperature?: float
+  tool_choice?: InferenceChatCompletionUnifiedCompletionToolType
+  tools?: InferenceChatCompletionUnifiedCompletionTool[]
+  top_p?: float
+}
+
 export interface InferenceRerankedInferenceResult {
   rerank: InferenceRankedDocument[]
 }
@@ -13180,19 +13191,9 @@ export interface InferenceChatCompletionUnifiedMessage {
 
 export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[]
 
-export interface InferenceChatCompletionUnifiedRequest extends RequestBase {
+export interface InferenceChatCompletionUnifiedRequest extends InferenceRequestChatCompletionBase {
   inference_id: Id
   timeout?: Duration
-  body?: {
-    messages: InferenceChatCompletionUnifiedMessage[]
-    model?: string
-    max_completion_tokens?: long
-    stop?: string[]
-    temperature?: float
-    tool_choice?: InferenceChatCompletionUnifiedCompletionToolType
-    tools?: InferenceChatCompletionUnifiedCompletionTool[]
-    top_p?: float
-  }
 }
 
 export type InferenceChatCompletionUnifiedResponse = StreamResult
@@ -13237,6 +13238,12 @@ export interface InferenceGetResponse {
   endpoints: InferenceInferenceEndpointInfo[]
 }
 
+export interface InferencePostEisChatCompletionRequest extends InferenceRequestChatCompletionBase {
+  eis_inference_id: Id
+}
+
+export type InferencePostEisChatCompletionResponse = StreamResult
+
 export interface InferencePutRequest extends RequestBase {
   task_type?: InferenceTaskType
   inference_id: Id
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index d793dd61e5..9532dedc92 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -316,6 +316,7 @@ infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/ref
 inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete
 inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get
 inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference
+inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion
 inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put
 inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html
 inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html
diff --git a/specification/_json_spec/inference.post_eis_chat_completion.json b/specification/_json_spec/inference.post_eis_chat_completion.json
new file mode 100644
index 0000000000..e34b14913b
--- /dev/null
+++ b/specification/_json_spec/inference.post_eis_chat_completion.json
@@ -0,0 +1,31 @@
+{
+  "inference.post_eis_chat_completion": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html",
+      "description": "Perform a chat completion task via the Elastic Inference Service (EIS)"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/chat_completion/{eis_inference_id}/_stream",
+          "methods": ["POST"],
+          "parts": {
+            "eis_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference tasks settings to perform"
+    }
+  }
+}
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
new file mode 100644
index 0000000000..8879466eb3
--- /dev/null
+++ b/specification/inference/_types/CommonTypes.ts
@@ -0,0 +1,61 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import {
+  CompletionTool,
+  CompletionToolType,
+  Message
+} from '@inference/chat_completion_unified/UnifiedRequest'
+import { RequestBase } from '@_types/Base'
+import { float, long } from '@_types/Numeric'
+
+export interface RequestChatCompletionBase extends RequestBase {
+  /**
+   * A list of objects representing the conversation.
+   */
+  messages: Array<Message>
+  /**
+   * The ID of the model to use.
+   */
+  model?: string
+  /**
+   * The upper bound limit for the number of tokens that can be generated for a completion request.
+   */
+  max_completion_tokens?: long
+  /**
+   * A sequence of strings to control when the model should stop generating additional tokens.
+   */
+  stop?: Array<string>
+  /**
+   * The sampling temperature to use.
+   */
+  temperature?: float
+  /**
+   * Controls which tool is called by the model.
+   */
+  tool_choice?: CompletionToolType
+  /**
+   * A list of tools that the model can call.
+   */
+  tools?: Array<CompletionTool>
+  /**
+   * Nucleus sampling, an alternative to sampling with temperature.
+   */
+  top_p?: float
+}
diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts
index ac460afa95..1932021931 100644
--- a/specification/inference/chat_completion_unified/UnifiedRequest.ts
+++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts
@@ -17,10 +17,9 @@
  * under the License.
  */
 
+import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
-import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { float, long } from '@_types/Numeric'
 import { Duration } from '@_types/Time'
 
 /**
@@ -30,7 +29,7 @@ import { Duration } from '@_types/Time'
  * @availability serverless stability=stable visibility=public
  * @doc_id inference-api-chat-completion
  */
-export interface Request extends RequestBase {
+export interface Request extends RequestChatCompletionBase {
   urls: [
     {
       path: '/_inference/chat_completion/{inference_id}/_stream'
@@ -50,40 +49,6 @@ export interface Request extends RequestBase {
      */
     timeout?: Duration
   }
-  body: {
-    /**
-     * A list of objects representing the conversation.
-     */
-    messages: Array<Message>
-    /**
-     * The ID of the model to use.
-     */
-    model?: string
-    /**
-     * The upper bound limit for the number of tokens that can be generated for a completion request.
-     */
-    max_completion_tokens?: long
-    /**
-     * A sequence of strings to control when the model should stop generating additional tokens.
-     */
-    stop?: Array<string>
-    /**
-     * The sampling temperature to use.
-     */
-    temperature?: float
-    /**
-     * Controls which tool is called by the model.
-     */
-    tool_choice?: CompletionToolType
-    /**
-     * A list of tools that the model can call.
-     */
-    tools?: Array<CompletionTool>
-    /**
-     * Nucleus sampling, an alternative to sampling with temperature.
-     */
-    top_p?: float
-  }
 }
 
 /**
diff --git a/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts b/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts
new file mode 100644
index 0000000000..6088a1b0a1
--- /dev/null
+++ b/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts
@@ -0,0 +1,46 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
+import { Id } from '@_types/common'
+
+/**
+ * Perform a chat completion task through the Elastic Inference Service (EIS).
+ *
+ * Perform a chat completion inference task with the `elastic` service.
+ * @rest_spec_name inference.post_eis_chat_completion
+ * @availability stack since=9.0.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-post-eis-chat-completion
+ */
+export interface Request extends RequestChatCompletionBase {
+  urls: [
+    {
+      path: '/_inference/chat_completion/{eis_inference_id}/_stream'
+      methods: ['POST']
+    }
+  ]
+  path_parts: {
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    eis_inference_id: Id
+  }
+}
diff --git a/specification/inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts b/specification/inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts
new file mode 100644
index 0000000000..74b823bf40
--- /dev/null
+++ b/specification/inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { StreamResult } from '@_types/Binary'
+
+export class Response {
+  body: StreamResult
+}
diff --git a/specification/inference/post_eis_chat_completion/examples/PostEisChatCompletionRequestExample.yaml b/specification/inference/post_eis_chat_completion/examples/PostEisChatCompletionRequestExample.yaml
new file mode 100644
index 0000000000..575da1bfa6
--- /dev/null
+++ b/specification/inference/post_eis_chat_completion/examples/PostEisChatCompletionRequestExample.yaml
@@ -0,0 +1,17 @@
+summary: A chat completion task
+description: Run `POST /_inference/chat_completion/<model_id>/_stream` to perform a streaming chat completion task type.
+# method_request: "POST /_inference/chat_completion/.rainbow-sprinkles-elastic/_stream"
+# type: "request"
+value: |-
+  {
+      "parameters":
+      "messages": [
+          {
+              "role": "user",
+              "content": "Say yes if it works."
+          }
+      ],
+      "temperature": 0.7,
+      "max_completion_tokens": 300
+      }
+  }