From ea37f167c74e4b786d8372a7da555242c055a26d Mon Sep 17 00:00:00 2001
From: Jonathan Buttner <jonathan.buttner@elastic.co>
Date: Mon, 16 Dec 2024 14:38:17 -0500
Subject: [PATCH 1/3] Adding the unified api

---
 output/schema/schema.json                     | 910 +++++++++++++++++-
 output/typescript/types.ts                    | 101 ++
 .../inference.unified_inference.json          |  45 +
 specification/inference/_types/Results.ts     | 114 +++
 .../unified_inference/UnifiedRequest.ts       | 214 ++++
 .../unified_inference/UnifiedResponse.ts      |  24 +
 6 files changed, 1407 insertions(+), 1 deletion(-)
 create mode 100644 specification/_json_spec/inference.unified_inference.json
 create mode 100644 specification/inference/unified_inference/UnifiedRequest.ts
 create mode 100644 specification/inference/unified_inference/UnifiedResponse.ts

diff --git a/output/schema/schema.json b/output/schema/schema.json
index 478520ccf2..4e3359e7a4 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -8592,6 +8592,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.18.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform inference on the service using the Unified Schema",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html",
+      "name": "inference.unified_inference",
+      "request": {
+        "name": "Request",
+        "namespace": "inference.unified_inference"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.unified_inference"
+      },
+      "responseMediaType": [
+        "text/event-stream"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/{inference_id}/_unified"
+        },
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/{task_type}/{inference_id}/_unified"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -141024,6 +141069,114 @@
       },
       "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30"
     },
+    {
+      "kind": "interface",
+      "description": "Represent a completion choice returned from a model.",
+      "name": {
+        "name": "CompletionChoice",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The delta generated by the model.",
+          "name": "delta",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionDelta",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The reason the model stopped generating tokens.",
+          "name": "finish_reason",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The index of the choice in the array of choices field.",
+          "name": "index",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "number",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L143-L159"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "CompletionDelta",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The contents of the chunked message.",
+          "name": "content",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The refusal message.",
+          "name": "refusal",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The role of the author of the message.",
+          "name": "role",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool calls made by the model.",
+          "name": "tool_calls",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ResultToolCall",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L124-L141"
+    },
     {
       "kind": "interface",
       "description": "The completion result object",
@@ -141075,7 +141228,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L91-L96"
+      "specLocation": "inference/_types/Results.ts#L205-L210"
     },
     {
       "kind": "type_alias",
@@ -141331,6 +141484,99 @@
       ],
       "specLocation": "inference/_types/Results.ts#L67-L77"
     },
+    {
+      "kind": "interface",
+      "description": "The function the model wants to call.",
+      "name": {
+        "name": "ResultFunctionCall",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The arguments to call the function with in that the model generated in JSON format.",
+          "name": "arguments",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the function to call.",
+          "name": "name",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L91-L103"
+    },
+    {
+      "kind": "interface",
+      "description": "The tool call made by the model.",
+      "name": {
+        "name": "ResultToolCall",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "index",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "number",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The identifier of the tool call.",
+          "name": "id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The function the model wants to call.",
+          "name": "function",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "ResultFunctionCall",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The type of the tool.",
+          "name": "type",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L105-L122"
+    },
     {
       "kind": "type_alias",
       "name": {
@@ -141467,6 +141713,127 @@
       ],
       "specLocation": "inference/_types/Results.ts#L53-L58"
     },
+    {
+      "kind": "interface",
+      "description": "Respresents the result format for a completion request using the Unified Inference API.",
+      "name": {
+        "name": "UnifiedInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A unique identifier for the chat completion",
+          "name": "id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "A list of completion choices.",
+          "name": "choices",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionChoice",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "description": "The model that generated the completion.",
+          "name": "model",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The object type.",
+          "name": "object",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The token usage statistics for the entire request.",
+          "name": "usage",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Usage",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L179-L203"
+    },
+    {
+      "kind": "interface",
+      "description": "The token usage statistics for the entire request.",
+      "name": {
+        "name": "Usage",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The number of tokens in the generated completion.",
+          "name": "completion_tokens",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "number",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The number of tokens in the prompt.",
+          "name": "prompt_tokens",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "number",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The sum of completion_tokens and prompt_tokens.",
+          "name": "total_tokens",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "number",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L161-L177"
+    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -141842,6 +142209,547 @@
       },
       "specLocation": "inference/put/PutResponse.ts#L22-L24"
     },
+    {
+      "kind": "interface",
+      "description": "A list of tools that the model can call.",
+      "name": {
+        "name": "CompletionTool",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The type of tool.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The function definition.",
+          "name": "function",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolFunction",
+              "namespace": "inference.unified_inference"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L142-L154"
+    },
+    {
+      "kind": "interface",
+      "description": "Controls which tool is called by the model.",
+      "name": {
+        "name": "CompletionToolChoice",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The type of the tool.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool choice function.",
+          "name": "function",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolChoiceFunction",
+              "namespace": "inference.unified_inference"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L105-L117"
+    },
+    {
+      "kind": "interface",
+      "description": "The tool choice function.",
+      "name": {
+        "name": "CompletionToolChoiceFunction",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The name of the function to call.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L94-L103"
+    },
+    {
+      "kind": "interface",
+      "description": "The completion tool function definition.",
+      "name": {
+        "name": "CompletionToolFunction",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+          "name": "description",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the function.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+          "name": "parameters",
+          "required": false,
+          "type": {
+            "kind": "user_defined_value"
+          }
+        },
+        {
+          "description": "Whether to enable schema adherence when generating the function call.",
+          "name": "strict",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L119-L140"
+    },
+    {
+      "kind": "interface",
+      "description": "An object style representation of a single portion of a conversation.",
+      "name": {
+        "name": "ContentObject",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The text content.",
+          "name": "text",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The type of content.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L26-L38"
+    },
+    {
+      "kind": "interface",
+      "description": "An object representing part of the conversation.",
+      "name": {
+        "name": "Message",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The content of the message.",
+          "name": "content",
+          "required": true,
+          "type": {
+            "kind": "union_of",
+            "items": [
+              {
+                "kind": "instance_of",
+                "type": {
+                  "name": "string",
+                  "namespace": "_builtins"
+                }
+              },
+              {
+                "kind": "array_of",
+                "value": {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "ContentObject",
+                    "namespace": "inference.unified_inference"
+                  }
+                }
+              }
+            ]
+          }
+        },
+        {
+          "description": "The role of the message author.",
+          "name": "role",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool call that this message is responding to.",
+          "name": "tool_call_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool calls generated by the model.",
+          "name": "tool_calls",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ToolCall",
+                "namespace": "inference.unified_inference"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L72-L92"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "A list of objects representing the conversation.",
+            "name": "messages",
+            "required": true,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "Message",
+                  "namespace": "inference.unified_inference"
+                }
+              }
+            }
+          },
+          {
+            "description": "The ID of the model to use.",
+            "name": "model",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+            "name": "max_completion_tokens",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "number",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+            "name": "stop",
+            "required": false,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "string",
+                  "namespace": "_builtins"
+                }
+              }
+            }
+          },
+          {
+            "description": "The sampling temperature to use.",
+            "name": "temperature",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "number",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "Controls which tool is called by the model.",
+            "name": "tool_choice",
+            "required": false,
+            "type": {
+              "kind": "union_of",
+              "items": [
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "string",
+                    "namespace": "_builtins"
+                  }
+                },
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "CompletionToolChoice",
+                    "namespace": "inference.unified_inference"
+                  }
+                }
+              ]
+            }
+          },
+          {
+            "description": "A list of tools that the model can call.",
+            "name": "tools",
+            "required": false,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "CompletionTool",
+                  "namespace": "inference.unified_inference"
+                }
+              }
+            }
+          },
+          {
+            "description": "Nucleus sampling, an alternative to sampling with temperature.",
+            "name": "top_p",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "number",
+                "namespace": "_builtins"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Perform inference on the service using the Unified Schema",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.unified_inference"
+      },
+      "path": [
+        {
+          "description": "The task type",
+          "name": "task_type",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskType",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The inference Id",
+          "name": "inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L156-L214"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "UnifiedInferenceResult",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.unified_inference"
+      },
+      "specLocation": "inference/unified_inference/UnifiedResponse.ts#L22-L24"
+    },
+    {
+      "kind": "interface",
+      "description": "A tool call generated by the model.",
+      "name": {
+        "name": "ToolCall",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The identifier of the tool call.",
+          "name": "id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The function that the model called.",
+          "name": "function",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "ToolCallFunction",
+              "namespace": "inference.unified_inference"
+            }
+          }
+        },
+        {
+          "description": "The type of the tool call.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L54-L70"
+    },
+    {
+      "kind": "interface",
+      "description": "The function that the model called.",
+      "name": {
+        "name": "ToolCallFunction",
+        "namespace": "inference.unified_inference"
+      },
+      "properties": [
+        {
+          "description": "The arguments to call the function with in JSON format.",
+          "name": "arguments",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the function to call.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L40-L52"
+    },
     {
       "kind": "interface",
       "inherits": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 074d11f89d..d8e150d8c1 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -12714,6 +12714,19 @@ export interface IndicesValidateQueryResponse {
   error?: string
 }
 
+export interface InferenceCompletionChoice {
+  delta: InferenceCompletionDelta
+  finish_reason?: string
+  index: number
+}
+
+export interface InferenceCompletionDelta {
+  content?: string
+  refusal?: string
+  role?: string
+  tool_calls?: InferenceResultToolCall[]
+}
+
 export interface InferenceCompletionResult {
   result: string
 }
@@ -12751,6 +12764,18 @@ export interface InferenceRankedDocument {
   text?: string
 }
 
+export interface InferenceResultFunctionCall {
+  arguments?: string
+  name?: string
+}
+
+export interface InferenceResultToolCall {
+  index: number
+  id?: string
+  function?: InferenceResultFunctionCall
+  type?: string
+}
+
 export type InferenceServiceSettings = any
 
 export interface InferenceSparseEmbeddingResult {
@@ -12771,6 +12796,20 @@ export interface InferenceTextEmbeddingResult {
   embedding: InferenceDenseVector
 }
 
+export interface InferenceUnifiedInferenceResult {
+  id: string
+  choices: InferenceCompletionChoice[]
+  model: string
+  object: string
+  usage?: InferenceUsage
+}
+
+export interface InferenceUsage {
+  completion_tokens: number
+  prompt_tokens: number
+  total_tokens: number
+}
+
 export interface InferenceDeleteRequest extends RequestBase {
   task_type?: InferenceTaskType
   inference_id: Id
@@ -12810,6 +12849,68 @@ export interface InferencePutRequest extends RequestBase {
 
 export type InferencePutResponse = InferenceInferenceEndpointInfo
 
+export interface InferenceUnifiedInferenceCompletionTool {
+  type: string
+  function: InferenceUnifiedInferenceCompletionToolFunction
+}
+
+export interface InferenceUnifiedInferenceCompletionToolChoice {
+  type: string
+  function: InferenceUnifiedInferenceCompletionToolChoiceFunction
+}
+
+export interface InferenceUnifiedInferenceCompletionToolChoiceFunction {
+  name: string
+}
+
+export interface InferenceUnifiedInferenceCompletionToolFunction {
+  description?: string
+  name: string
+  parameters?: any
+  strict?: boolean
+}
+
+export interface InferenceUnifiedInferenceContentObject {
+  text: string
+  type: string
+}
+
+export interface InferenceUnifiedInferenceMessage {
+  content: string | InferenceUnifiedInferenceContentObject[]
+  role: string
+  tool_call_id?: string
+  tool_calls?: InferenceUnifiedInferenceToolCall[]
+}
+
+export interface InferenceUnifiedInferenceRequest extends RequestBase {
+  task_type?: InferenceTaskType
+  inference_id: Id
+  timeout?: Duration
+  body?: {
+    messages: InferenceUnifiedInferenceMessage[]
+    model?: string
+    max_completion_tokens?: number
+    stop?: string[]
+    temperature?: number
+    tool_choice?: string | InferenceUnifiedInferenceCompletionToolChoice
+    tools?: InferenceUnifiedInferenceCompletionTool[]
+    top_p?: number
+  }
+}
+
+export type InferenceUnifiedInferenceResponse = InferenceUnifiedInferenceResult
+
+export interface InferenceUnifiedInferenceToolCall {
+  id: string
+  function: InferenceUnifiedInferenceToolCallFunction
+  type: string
+}
+
+export interface InferenceUnifiedInferenceToolCallFunction {
+  arguments: string
+  name: string
+}
+
 export interface IngestAppendProcessor extends IngestProcessorBase {
   field: Field
   value: any | any[]
diff --git a/specification/_json_spec/inference.unified_inference.json b/specification/_json_spec/inference.unified_inference.json
new file mode 100644
index 0000000000..84182d19f8
--- /dev/null
+++ b/specification/_json_spec/inference.unified_inference.json
@@ -0,0 +1,45 @@
+{
+  "inference.unified_inference": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html",
+      "description": "Perform inference using the Unified Schema"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["text/event-stream"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{inference_id}/_unified",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        },
+        {
+          "path": "/_inference/{task_type}/{inference_id}/_unified",
+          "methods": ["POST"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts
index 1a35289bab..83bfe8a442 100644
--- a/specification/inference/_types/Results.ts
+++ b/specification/inference/_types/Results.ts
@@ -88,6 +88,120 @@ export class InferenceResult {
   rerank?: Array<RankedDocument>
 }
 
+/**
+ * The function the model wants to call.
+ */
+export class ResultFunctionCall {
+  /**
+   * The arguments to call the function with in that the model generated in JSON format.
+   */
+  arguments?: string
+  /**
+   * The name of the function to call.
+   */
+  name?: string
+}
+
+/**
+ * The tool call made by the model.
+ */
+export class ResultToolCall {
+  index: number
+  /**
+   * The identifier of the tool call.
+   */
+  id?: string
+  /**
+   * The function the model wants to call.
+   */
+  function?: ResultFunctionCall
+  /**
+   * The type of the tool.
+   */
+  type?: string
+}
+
+export class CompletionDelta {
+  /**
+   * The contents of the chunked message.
+   */
+  content?: string
+  /**
+   * The refusal message.
+   */
+  refusal?: string
+  /**
+   * The role of the author of the message.
+   */
+  role?: string
+  /**
+   * The tool calls made by the model.
+   */
+  tool_calls?: Array<ResultToolCall>
+}
+
+/**
+ * Represent a completion choice returned from a model.
+ */
+export class CompletionChoice {
+  /**
+   * The delta generated by the model.
+   */
+  delta: CompletionDelta
+  /**
+   * The reason the model stopped generating tokens.
+   */
+  finish_reason?: string
+  /**
+   * The index of the choice in the array of choices field.
+   */
+  index: number
+}
+
+/**
+ * The token usage statistics for the entire request.
+ */
+export class Usage {
+  /**
+   * The number of tokens in the generated completion.
+   */
+  completion_tokens: number
+  /**
+   * The number of tokens in the prompt.
+   */
+  prompt_tokens: number
+  /**
+   * The sum of completion_tokens and prompt_tokens.
+   */
+  total_tokens: number
+}
+
+/**
+ * Respresents the result format for a completion request using the Unified Inference API.
+ */
+export class UnifiedInferenceResult {
+  /**
+   * A unique identifier for the chat completion
+   */
+  id: string
+  /**
+   * A list of completion choices.
+   */
+  choices: Array<CompletionChoice>
+  /**
+   * The model that generated the completion.
+   */
+  model: string
+  /**
+   * The object type.
+   */
+  object: string
+  /**
+   * The token usage statistics for the entire request.
+   */
+  usage?: Usage
+}
+
 /**
  * Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint
  */
diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts
new file mode 100644
index 0000000000..67a48e73d0
--- /dev/null
+++ b/specification/inference/unified_inference/UnifiedRequest.ts
@@ -0,0 +1,214 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import { TaskType } from '@inference/_types/TaskType'
+
+/**
+ * An object style representation of a single portion of a conversation.
+ */
+export interface ContentObject {
+  /**
+   * The text content.
+   */
+  text: string
+  /**
+   * The type of content.
+   */
+  type: string
+}
+
+/**
+ * The function that the model called.
+ */
+export interface ToolCallFunction {
+  /**
+   * The arguments to call the function with in JSON format.
+   */
+  arguments: string
+  /**
+   * The name of the function to call.
+   */
+  name: string
+}
+
+/**
+ * A tool call generated by the model.
+ */
+export interface ToolCall {
+  /**
+   * The identifier of the tool call.
+   */
+  id: string
+  /**
+   * The function that the model called.
+   */
+  function: ToolCallFunction
+  /**
+   * The type of the tool call.
+   */
+  type: string
+}
+
+/**
+ * An object representing part of the conversation.
+ */
+export interface Message {
+  /**
+   * The content of the message.
+   */
+  content: string | Array<ContentObject>
+  /**
+   * The role of the message author.
+   */
+  role: string
+  /**
+   * The tool call that this message is responding to.
+   */
+  tool_call_id?: string
+  /**
+   * The tool calls generated by the model.
+   */
+  tool_calls?: Array<ToolCall>
+}
+
+/**
+ * The tool choice function.
+ *
+ */
+export interface CompletionToolChoiceFunction {
+  /**
+   * The name of the function to call.
+   */
+  name: string
+}
+
+/**
+ * Controls which tool is called by the model.
+ */
+export interface CompletionToolChoice {
+  /**
+   * The type of the tool.
+   */
+  type: string
+  /**
+   * The tool choice function.
+   */
+  function: CompletionToolChoiceFunction
+}
+
+/**
+ * The completion tool function definition.
+ */
+export interface CompletionToolFunction {
+  /**
+   * A description of what the function does.
+   * This is used by the model to choose when and how to call the function.
+   */
+  description?: string
+  /**
+   * The name of the function.
+   */
+  name: string
+  /**
+   * The parameters the functional accepts. This should be formatted as a JSON object.
+   */
+  parameters?: UserDefinedValue
+  /**
+   * Whether to enable schema adherence when generating the function call.
+   */
+  strict?: boolean
+}
+
+/**
+ * A list of tools that the model can call.
+ */
+export interface CompletionTool {
+  /**
+   * The type of tool.
+   */
+  type: string
+  /**
+   * The function definition.
+   */
+  function: CompletionToolFunction
+}
+
+/**
+ * Perform inference on the service using the Unified Schema
+ * @rest_spec_name inference.unified_inference
+ * @availability stack since=8.18.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ */
+export interface Request extends RequestBase {
+  path_parts: {
+    /**
+     * The task type
+     */
+    task_type?: TaskType
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * A list of objects representing the conversation.
+     */
+    messages: Array<Message>
+    /**
+     * The ID of the model to use.
+     */
+    model?: string
+    /**
+     * The upper bound limit for the number of tokens that can be generated for a completion request.
+     */
+    max_completion_tokens?: number
+    /**
+     * A sequence of strings to control when the model should stop generating additional tokens.
+     */
+    stop?: Array<string>
+    /**
+     * The sampling temperature to use.
+     */
+    temperature?: number
+    /**
+     * Controls which tool is called by the model.
+     */
+    tool_choice?: string | CompletionToolChoice
+    /**
+     * A list of tools that the model can call.
+     */
+    tools?: Array<CompletionTool>
+    /**
+     * Nucleus sampling, an alternative to sampling with temperature.
+     */
+    top_p?: number
+  }
+}
diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/unified_inference/UnifiedResponse.ts
new file mode 100644
index 0000000000..6bcd6089b1
--- /dev/null
+++ b/specification/inference/unified_inference/UnifiedResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { UnifiedInferenceResult } from '@inference/_types/Results'
+
+export class Response {
+  body: UnifiedInferenceResult
+}

From 5b80fae7d8cf3fd7333ad6810c861770f9588bb0 Mon Sep 17 00:00:00 2001
From: Jonathan Buttner <jonathan.buttner@elastic.co>
Date: Mon, 16 Dec 2024 14:48:57 -0500
Subject: [PATCH 2/3] Fixing formatting

---
 specification/inference/unified_inference/UnifiedRequest.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts
index 67a48e73d0..6160e015e7 100644
--- a/specification/inference/unified_inference/UnifiedRequest.ts
+++ b/specification/inference/unified_inference/UnifiedRequest.ts
@@ -17,11 +17,11 @@
  * under the License.
  */
 
+import { TaskType } from '@inference/_types/TaskType'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 import { Duration } from '@_types/Time'
-import { TaskType } from '@inference/_types/TaskType'
 
 /**
  * An object style representation of a single portion of a conversation.

From 90f9fd26b7b5dc9499db0133f54fb24e0245fa83 Mon Sep 17 00:00:00 2001
From: Jonathan Buttner <jonathan.buttner@elastic.co>
Date: Fri, 10 Jan 2025 14:51:08 -0500
Subject: [PATCH 3/3] Addressing feedback and removing response

---
 specification/_types/Binary.ts                |   3 +
 specification/inference/_types/Results.ts     | 114 ---------------
 .../unified_inference/UnifiedRequest.ts       | 137 ++++++++++--------
 .../unified_inference/UnifiedResponse.ts      |   4 +-
 4 files changed, 79 insertions(+), 179 deletions(-)

diff --git a/specification/_types/Binary.ts b/specification/_types/Binary.ts
index 56b792ea38..f00d8ddcd4 100644
--- a/specification/_types/Binary.ts
+++ b/specification/_types/Binary.ts
@@ -22,3 +22,6 @@ export type MapboxVectorTiles = ArrayBuffer
 
 // ES|QL columns
 export type EsqlColumns = ArrayBuffer
+
+// Streaming endpoints response
+export type StreamResult = ArrayBuffer
diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts
index 83bfe8a442..1a35289bab 100644
--- a/specification/inference/_types/Results.ts
+++ b/specification/inference/_types/Results.ts
@@ -88,120 +88,6 @@ export class InferenceResult {
   rerank?: Array<RankedDocument>
 }
 
-/**
- * The function the model wants to call.
- */
-export class ResultFunctionCall {
-  /**
-   * The arguments to call the function with in that the model generated in JSON format.
-   */
-  arguments?: string
-  /**
-   * The name of the function to call.
-   */
-  name?: string
-}
-
-/**
- * The tool call made by the model.
- */
-export class ResultToolCall {
-  index: number
-  /**
-   * The identifier of the tool call.
-   */
-  id?: string
-  /**
-   * The function the model wants to call.
-   */
-  function?: ResultFunctionCall
-  /**
-   * The type of the tool.
-   */
-  type?: string
-}
-
-export class CompletionDelta {
-  /**
-   * The contents of the chunked message.
-   */
-  content?: string
-  /**
-   * The refusal message.
-   */
-  refusal?: string
-  /**
-   * The role of the author of the message.
-   */
-  role?: string
-  /**
-   * The tool calls made by the model.
-   */
-  tool_calls?: Array<ResultToolCall>
-}
-
-/**
- * Represent a completion choice returned from a model.
- */
-export class CompletionChoice {
-  /**
-   * The delta generated by the model.
-   */
-  delta: CompletionDelta
-  /**
-   * The reason the model stopped generating tokens.
-   */
-  finish_reason?: string
-  /**
-   * The index of the choice in the array of choices field.
-   */
-  index: number
-}
-
-/**
- * The token usage statistics for the entire request.
- */
-export class Usage {
-  /**
-   * The number of tokens in the generated completion.
-   */
-  completion_tokens: number
-  /**
-   * The number of tokens in the prompt.
-   */
-  prompt_tokens: number
-  /**
-   * The sum of completion_tokens and prompt_tokens.
-   */
-  total_tokens: number
-}
-
-/**
- * Respresents the result format for a completion request using the Unified Inference API.
- */
-export class UnifiedInferenceResult {
-  /**
-   * A unique identifier for the chat completion
-   */
-  id: string
-  /**
-   * A list of completion choices.
-   */
-  choices: Array<CompletionChoice>
-  /**
-   * The model that generated the completion.
-   */
-  model: string
-  /**
-   * The object type.
-   */
-  object: string
-  /**
-   * The token usage statistics for the entire request.
-   */
-  usage?: Usage
-}
-
 /**
  * Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint
  */
diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts
index 6160e015e7..b646bfa255 100644
--- a/specification/inference/unified_inference/UnifiedRequest.ts
+++ b/specification/inference/unified_inference/UnifiedRequest.ts
@@ -21,8 +21,74 @@ import { TaskType } from '@inference/_types/TaskType'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { float, long } from '@_types/Numeric'
 import { Duration } from '@_types/Time'
 
+/**
+ * Perform inference on the service using the Unified Schema
+ * @rest_spec_name inference.unified_inference
+ * @availability stack since=8.18.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ */
+export interface Request extends RequestBase {
+  path_parts: {
+    /**
+     * The task type
+     */
+    task_type?: TaskType
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * A list of objects representing the conversation.
+     */
+    messages: Array<Message>
+    /**
+     * The ID of the model to use.
+     */
+    model?: string
+    /**
+     * The upper bound limit for the number of tokens that can be generated for a completion request.
+     */
+    max_completion_tokens?: long
+    /**
+     * A sequence of strings to control when the model should stop generating additional tokens.
+     */
+    stop?: Array<string>
+    /**
+     * The sampling temperature to use.
+     */
+    temperature?: float
+    /**
+     * Controls which tool is called by the model.
+     */
+    tool_choice?: CompletionToolType
+    /**
+     * A list of tools that the model can call.
+     */
+    tools?: Array<CompletionTool>
+    /**
+     * Nucleus sampling, an alternative to sampling with temperature.
+     */
+    top_p?: float
+  }
+}
+
+/**
+ * @codegen_names string, object
+ */
+export type CompletionToolType = string | CompletionToolChoice
+
 /**
  * An object style representation of a single portion of a conversation.
  */
@@ -58,7 +124,7 @@ export interface ToolCall {
   /**
    * The identifier of the tool call.
    */
-  id: string
+  id: Id
   /**
    * The function that the model called.
    */
@@ -69,6 +135,11 @@ export interface ToolCall {
   type: string
 }
 
+/**
+ * @codegen_names string, object
+ */
+export type MessageContent = string | Array<ContentObject>
+
 /**
  * An object representing part of the conversation.
  */
@@ -76,7 +147,7 @@ export interface Message {
   /**
    * The content of the message.
    */
-  content: string | Array<ContentObject>
+  content?: MessageContent
   /**
    * The role of the message author.
    */
@@ -84,7 +155,7 @@ export interface Message {
   /**
    * The tool call that this message is responding to.
    */
-  tool_call_id?: string
+  tool_call_id?: Id
   /**
    * The tool calls generated by the model.
    */
@@ -152,63 +223,3 @@ export interface CompletionTool {
    */
   function: CompletionToolFunction
 }
-
-/**
- * Perform inference on the service using the Unified Schema
- * @rest_spec_name inference.unified_inference
- * @availability stack since=8.18.0 stability=stable visibility=public
- * @availability serverless stability=stable visibility=public
- */
-export interface Request extends RequestBase {
-  path_parts: {
-    /**
-     * The task type
-     */
-    task_type?: TaskType
-    /**
-     * The inference Id
-     */
-    inference_id: Id
-  }
-  query_parameters: {
-    /**
-     * Specifies the amount of time to wait for the inference request to complete.
-     * @server_default 30s
-     */
-    timeout?: Duration
-  }
-  body: {
-    /**
-     * A list of objects representing the conversation.
-     */
-    messages: Array<Message>
-    /**
-     * The ID of the model to use.
-     */
-    model?: string
-    /**
-     * The upper bound limit for the number of tokens that can be generated for a completion request.
-     */
-    max_completion_tokens?: number
-    /**
-     * A sequence of strings to control when the model should stop generating additional tokens.
-     */
-    stop?: Array<string>
-    /**
-     * The sampling temperature to use.
-     */
-    temperature?: number
-    /**
-     * Controls which tool is called by the model.
-     */
-    tool_choice?: string | CompletionToolChoice
-    /**
-     * A list of tools that the model can call.
-     */
-    tools?: Array<CompletionTool>
-    /**
-     * Nucleus sampling, an alternative to sampling with temperature.
-     */
-    top_p?: number
-  }
-}
diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/unified_inference/UnifiedResponse.ts
index 6bcd6089b1..74b823bf40 100644
--- a/specification/inference/unified_inference/UnifiedResponse.ts
+++ b/specification/inference/unified_inference/UnifiedResponse.ts
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-import { UnifiedInferenceResult } from '@inference/_types/Results'
+import { StreamResult } from '@_types/Binary'
 
 export class Response {
-  body: UnifiedInferenceResult
+  body: StreamResult
 }