From 00d43cc0b2258f62990be162d103fad1c324ea34 Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Wed, 19 Mar 2025 00:04:26 -0700
Subject: [PATCH 1/4] Add inference.put_elser.json

---
 .../_json_spec/inference.put_elser.json       | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 specification/_json_spec/inference.put_elser.json

diff --git a/specification/_json_spec/inference.put_elser.json b/specification/_json_spec/inference.put_elser.json
new file mode 100644
index 0000000000..e6025e264b
--- /dev/null
+++ b/specification/_json_spec/inference.put_elser.json
@@ -0,0 +1,35 @@
+{
+  "inference.put_elser": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
+      "description": "Configure an ELSER inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{elser_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "mistral_inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings"
+    }
+  }
+}

From edf3384abfbbb9be031cb89ab004d62d17c6460e Mon Sep 17 00:00:00 2001
From: Lisa Cawley <lcawley@elastic.co>
Date: Wed, 19 Mar 2025 00:28:25 -0700
Subject: [PATCH 2/4] Update specification/_json_spec/inference.put_elser.json

---
 specification/_json_spec/inference.put_elser.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/specification/_json_spec/inference.put_elser.json b/specification/_json_spec/inference.put_elser.json
index e6025e264b..de73112f88 100644
--- a/specification/_json_spec/inference.put_elser.json
+++ b/specification/_json_spec/inference.put_elser.json
@@ -20,7 +20,7 @@
               "type": "string",
               "description": "The task type"
             },
-            "mistral_inference_id": {
+            "elser_inference_id": {
               "type": "string",
               "description": "The inference Id"
             }

From 2c31d8349eb28cd40a9971184f4ec09c1fabd90c Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Thu, 20 Mar 2025 14:23:54 -0700
Subject: [PATCH 3/4] Add deprecation

---
 specification/_json_spec/inference.put_elser.json | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/specification/_json_spec/inference.put_elser.json b/specification/_json_spec/inference.put_elser.json
index de73112f88..e601e6c8c3 100644
--- a/specification/_json_spec/inference.put_elser.json
+++ b/specification/_json_spec/inference.put_elser.json
@@ -1,5 +1,9 @@
 {
   "inference.put_elser": {
+    "deprecated" : {
+      "version" : "8.16.0",
+      "description" : "The elser service is deprecated. Use the Elasticsearch inference integration instead, with model_id included in the service_settings."
+    },
     "documentation": {
       "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
       "description": "Configure an ELSER inference endpoint"

From d5fc365ee076995890e244e8460f71d289e331ab Mon Sep 17 00:00:00 2001
From: lcawl <lcawley@elastic.co>
Date: Thu, 20 Mar 2025 23:31:45 -0700
Subject: [PATCH 4/4] Add Elser details and examples

---
 output/openapi/elasticsearch-openapi.json     | 139 ++++++++
 .../elasticsearch-serverless-openapi.json     | 139 ++++++++
 output/schema/schema-serverless.json          | 296 ++++++++++++++++++
 output/schema/schema.json                     | 296 ++++++++++++++++++
 output/typescript/types.ts                    |  28 ++
 specification/_doc_ids/table.csv              |   1 +
 .../_json_spec/inference.put_elser.json       |   6 +-
 .../inference/put_elser/PutElserRequest.ts    | 137 ++++++++
 .../inference/put_elser/PutElserResponse.ts   |  24 ++
 .../request/PutElserRequestExample1.yaml      |  12 +
 .../request/PutElserRequestExample2.yaml      |  16 +
 .../response/PutElserResponseExample1.yaml    |  15 +
 12 files changed, 1106 insertions(+), 3 deletions(-)
 create mode 100644 specification/inference/put_elser/PutElserRequest.ts
 create mode 100644 specification/inference/put_elser/PutElserResponse.ts
 create mode 100644 specification/inference/put_elser/examples/request/PutElserRequestExample1.yaml
 create mode 100644 specification/inference/put_elser/examples/request/PutElserRequestExample2.yaml
 create mode 100644 specification/inference/put_elser/examples/response/PutElserResponseExample1.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 03694b1857..f02cf58569 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17830,6 +17830,96 @@
         "x-state": "Added in 8.12.0"
       }
     },
+    "/_inference/{task_type}/{elser_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an ELSER inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+        "operationId": "inference-put-elser",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference.put_elser:ElserTaskType"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "elser_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "chunking_settings": {
+                    "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
+                  },
+                  "service": {
+                    "$ref": "#/components/schemas/inference.put_elser:ServiceType"
+                  },
+                  "service_settings": {
+                    "$ref": "#/components/schemas/inference.put_elser:ElserServiceSettings"
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              },
+              "examples": {
+                "PutElserRequestExample1": {
+                  "summary": "A sparse embedding task",
+                  "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.",
+                  "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"num_allocations\": 1,\n        \"num_threads\": 1\n    }\n}"
+                },
+                "PutElserRequestExample2": {
+                  "summary": "Adaptive allocations",
+                  "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.",
+                  "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"adaptive_allocations\": {\n            \"enabled\": true,\n            \"min_number_of_allocations\": 3,\n            \"max_number_of_allocations\": 10\n        },\n        \"num_threads\": 1\n    }\n}"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo"
+                },
+                "examples": {
+                  "PutElserResponseExample1": {
+                    "description": "A successful response when creating an ELSER inference endpoint.",
+                    "value": "{\n  \"inference_id\": \"my-elser-model\",\n  \"task_type\": \"sparse_embedding\",\n  \"service\": \"elser\",\n  \"service_settings\": {\n    \"num_allocations\": 1,\n    \"num_threads\": 1\n  },\n  \"task_settings\": {}\n}"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "deprecated": true,
+        "x-state": "Added in 8.11.0"
+      }
+    },
     "/_inference/{task_type}/{googleaistudio_inference_id}": {
       "put": {
         "tags": [
@@ -77366,6 +77456,55 @@
           }
         }
       },
+      "inference.put_elser:ElserTaskType": {
+        "type": "string",
+        "enum": [
+          "sparse_embedding"
+        ]
+      },
+      "inference.put_elser:ServiceType": {
+        "type": "string",
+        "enum": [
+          "elser"
+        ]
+      },
+      "inference.put_elser:ElserServiceSettings": {
+        "type": "object",
+        "properties": {
+          "adaptive_allocations": {
+            "$ref": "#/components/schemas/inference.put_elser:AdaptiveAllocations"
+          },
+          "num_allocations": {
+            "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.",
+            "type": "number"
+          },
+          "num_threads": {
+            "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.",
+            "type": "number"
+          }
+        },
+        "required": [
+          "num_allocations",
+          "num_threads"
+        ]
+      },
+      "inference.put_elser:AdaptiveAllocations": {
+        "type": "object",
+        "properties": {
+          "enabled": {
+            "description": "Turn on `adaptive_allocations`.",
+            "type": "boolean"
+          },
+          "max_number_of_allocations": {
+            "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.",
+            "type": "number"
+          },
+          "min_number_of_allocations": {
+            "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.",
+            "type": "number"
+          }
+        }
+      },
       "inference.put_googleaistudio:GoogleAiStudioTaskType": {
         "type": "string",
         "enum": [
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 635ca82d14..2429111cd0 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9652,6 +9652,96 @@
         "x-state": "Added in 8.12.0"
       }
     },
+    "/_inference/{task_type}/{elser_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an ELSER inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+        "operationId": "inference-put-elser",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference.put_elser:ElserTaskType"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "elser_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types:Id"
+            },
+            "style": "simple"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "chunking_settings": {
+                    "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
+                  },
+                  "service": {
+                    "$ref": "#/components/schemas/inference.put_elser:ServiceType"
+                  },
+                  "service_settings": {
+                    "$ref": "#/components/schemas/inference.put_elser:ElserServiceSettings"
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              },
+              "examples": {
+                "PutElserRequestExample1": {
+                  "summary": "A sparse embedding task",
+                  "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.",
+                  "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"num_allocations\": 1,\n        \"num_threads\": 1\n    }\n}"
+                },
+                "PutElserRequestExample2": {
+                  "summary": "Adaptive allocations",
+                  "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.",
+                  "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"adaptive_allocations\": {\n            \"enabled\": true,\n            \"min_number_of_allocations\": 3,\n            \"max_number_of_allocations\": 10\n        },\n        \"num_threads\": 1\n    }\n}"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo"
+                },
+                "examples": {
+                  "PutElserResponseExample1": {
+                    "description": "A successful response when creating an ELSER inference endpoint.",
+                    "value": "{\n  \"inference_id\": \"my-elser-model\",\n  \"task_type\": \"sparse_embedding\",\n  \"service\": \"elser\",\n  \"service_settings\": {\n    \"num_allocations\": 1,\n    \"num_threads\": 1\n  },\n  \"task_settings\": {}\n}"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "deprecated": true,
+        "x-state": "Added in 8.11.0"
+      }
+    },
     "/_inference/{task_type}/{googleaistudio_inference_id}": {
       "put": {
         "tags": [
@@ -48558,6 +48648,55 @@
           }
         }
       },
+      "inference.put_elser:ElserTaskType": {
+        "type": "string",
+        "enum": [
+          "sparse_embedding"
+        ]
+      },
+      "inference.put_elser:ServiceType": {
+        "type": "string",
+        "enum": [
+          "elser"
+        ]
+      },
+      "inference.put_elser:ElserServiceSettings": {
+        "type": "object",
+        "properties": {
+          "adaptive_allocations": {
+            "$ref": "#/components/schemas/inference.put_elser:AdaptiveAllocations"
+          },
+          "num_allocations": {
+            "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.",
+            "type": "number"
+          },
+          "num_threads": {
+            "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.",
+            "type": "number"
+          }
+        },
+        "required": [
+          "num_allocations",
+          "num_threads"
+        ]
+      },
+      "inference.put_elser:AdaptiveAllocations": {
+        "type": "object",
+        "properties": {
+          "enabled": {
+            "description": "Turn on `adaptive_allocations`.",
+            "type": "boolean"
+          },
+          "max_number_of_allocations": {
+            "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.",
+            "type": "number"
+          },
+          "min_number_of_allocations": {
+            "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.",
+            "type": "number"
+          }
+        }
+      },
       "inference.put_googleaistudio:GoogleAiStudioTaskType": {
         "type": "string",
         "enum": [
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index cf60888bef..3d990342ee 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4636,6 +4636,55 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.11.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "deprecation": {
+        "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.",
+        "version": "8.16.0"
+      },
+      "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "docId": "inference-api-put-elser",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
+      "name": "inference.put_elser",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.put_elser"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.put_elser"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "PUT"
+          ],
+          "path": "/_inference/{task_type}/{elser_inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -27291,6 +27340,134 @@
       },
       "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24"
     },
+    {
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The chunking configuration object.",
+            "extDocId": "inference-chunking",
+            "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config",
+            "name": "chunking_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "InferenceChunkingSettings",
+                "namespace": "inference._types"
+              }
+            }
+          },
+          {
+            "description": "The type of service supported for the specified task type. In this case, `elser`.",
+            "name": "service",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ServiceType",
+                "namespace": "inference.put_elser"
+              }
+            }
+          },
+          {
+            "description": "Settings used to install the inference model. These settings are specific to the `elser` service.",
+            "name": "service_settings",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ElserServiceSettings",
+                "namespace": "inference.put_elser"
+              }
+            }
+          }
+        ]
+      },
+      "deprecation": {
+        "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.",
+        "version": "8.16.0"
+      },
+      "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "examples": {
+        "PutElserRequestExample1": {
+          "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.",
+          "summary": "A sparse embedding task",
+          "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"num_allocations\": 1,\n        \"num_threads\": 1\n    }\n}"
+        },
+        "PutElserRequestExample2": {
+          "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.",
+          "summary": "Adaptive allocations",
+          "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"adaptive_allocations\": {\n            \"enabled\": true,\n            \"min_number_of_allocations\": 3,\n            \"max_number_of_allocations\": 10\n        },\n        \"num_threads\": 1\n    }\n}"
+        }
+      },
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "kind": "request",
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_elser"
+      },
+      "path": [
+        {
+          "description": "The type of the inference task that the model will perform.",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "ElserTaskType",
+              "namespace": "inference.put_elser"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "elser_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82"
+    },
+    {
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "examples": {
+        "PutElserResponseExample1": {
+          "description": "A successful response when creating an ELSER inference endpoint.",
+          "value": "{\n  \"inference_id\": \"my-elser-model\",\n  \"task_type\": \"sparse_embedding\",\n  \"service\": \"elser\",\n  \"service_settings\": {\n    \"num_allocations\": 1,\n    \"num_threads\": 1\n  },\n  \"task_settings\": {}\n}"
+        }
+      },
+      "kind": "response",
+      "name": {
+        "name": "Response",
+        "namespace": "inference.put_elser"
+      },
+      "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24"
+    },
     {
       "attachedBehaviors": [
         "CommonQueryParameters"
@@ -101291,6 +101468,32 @@
       },
       "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70"
     },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "sparse_embedding"
+        }
+      ],
+      "name": {
+        "name": "ElserTaskType",
+        "namespace": "inference.put_elser"
+      },
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L84-L86"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elser"
+        }
+      ],
+      "name": {
+        "name": "ServiceType",
+        "namespace": "inference.put_elser"
+      },
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L88-L90"
+    },
     {
       "kind": "enum",
       "members": [
@@ -121971,6 +122174,99 @@
       ],
       "specLocation": "inference/_types/Services.ts#L95-L100"
     },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ElserServiceSettings",
+        "namespace": "inference.put_elser"
+      },
+      "properties": [
+        {
+          "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.",
+          "name": "adaptive_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "AdaptiveAllocations",
+              "namespace": "inference.put_elser"
+            }
+          }
+        },
+        {
+          "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.",
+          "name": "num_allocations",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.",
+          "name": "num_threads",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L111-L137"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "AdaptiveAllocations",
+        "namespace": "inference.put_elser"
+      },
+      "properties": [
+        {
+          "description": "Turn on `adaptive_allocations`.",
+          "name": "enabled",
+          "required": false,
+          "serverDefault": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.",
+          "name": "max_number_of_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.",
+          "name": "min_number_of_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L92-L109"
+    },
     {
       "kind": "interface",
       "name": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 200c2ac618..f0ace9d16f 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9348,6 +9348,55 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.11.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "deprecation": {
+        "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.",
+        "version": "8.16.0"
+      },
+      "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "docId": "inference-api-put-elser",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
+      "name": "inference.put_elser",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.put_elser"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.put_elser"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "PUT"
+          ],
+          "path": "/_inference/{task_type}/{elser_inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -150796,6 +150845,253 @@
       },
       "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70"
     },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "AdaptiveAllocations",
+        "namespace": "inference.put_elser"
+      },
+      "properties": [
+        {
+          "description": "Turn on `adaptive_allocations`.",
+          "name": "enabled",
+          "required": false,
+          "serverDefault": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.",
+          "name": "max_number_of_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.",
+          "name": "min_number_of_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L92-L109"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ElserServiceSettings",
+        "namespace": "inference.put_elser"
+      },
+      "properties": [
+        {
+          "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.",
+          "name": "adaptive_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "AdaptiveAllocations",
+              "namespace": "inference.put_elser"
+            }
+          }
+        },
+        {
+          "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.",
+          "name": "num_allocations",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.",
+          "name": "num_threads",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L111-L137"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "sparse_embedding"
+        }
+      ],
+      "name": {
+        "name": "ElserTaskType",
+        "namespace": "inference.put_elser"
+      },
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L84-L86"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The chunking configuration object.",
+            "extDocId": "inference-chunking",
+            "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config",
+            "name": "chunking_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "InferenceChunkingSettings",
+                "namespace": "inference._types"
+              }
+            }
+          },
+          {
+            "description": "The type of service supported for the specified task type. In this case, `elser`.",
+            "name": "service",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ServiceType",
+                "namespace": "inference.put_elser"
+              }
+            }
+          },
+          {
+            "description": "Settings used to install the inference model. These settings are specific to the `elser` service.",
+            "name": "service_settings",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ElserServiceSettings",
+                "namespace": "inference.put_elser"
+              }
+            }
+          }
+        ]
+      },
+      "deprecation": {
+        "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.",
+        "version": "8.16.0"
+      },
+      "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "examples": {
+        "PutElserRequestExample1": {
+          "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.",
+          "summary": "A sparse embedding task",
+          "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"num_allocations\": 1,\n        \"num_threads\": 1\n    }\n}"
+        },
+        "PutElserRequestExample2": {
+          "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.",
+          "summary": "Adaptive allocations",
+          "value": "{\n    \"service\": \"elser\",\n    \"service_settings\": {\n        \"adaptive_allocations\": {\n            \"enabled\": true,\n            \"min_number_of_allocations\": 3,\n            \"max_number_of_allocations\": 10\n        },\n        \"num_threads\": 1\n    }\n}"
+        }
+      },
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_elser"
+      },
+      "path": [
+        {
+          "description": "The type of the inference task that the model will perform.",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "ElserTaskType",
+              "namespace": "inference.put_elser"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "elser_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "examples": {
+        "PutElserResponseExample1": {
+          "description": "A successful response when creating an ELSER inference endpoint.",
+          "value": "{\n  \"inference_id\": \"my-elser-model\",\n  \"task_type\": \"sparse_embedding\",\n  \"service\": \"elser\",\n  \"service_settings\": {\n    \"num_allocations\": 1,\n    \"num_threads\": 1\n  },\n  \"task_settings\": {}\n}"
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.put_elser"
+      },
+      "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elser"
+        }
+      ],
+      "name": {
+        "name": "ServiceType",
+        "namespace": "inference.put_elser"
+      },
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L88-L90"
+    },
     {
       "kind": "interface",
       "name": {
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 846982c1cd..a9e4d10f6c 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13272,6 +13272,34 @@ export type InferencePutEisResponse = InferenceInferenceEndpointInfo
 
 export type InferencePutEisServiceType = 'elastic'
 
+export interface InferencePutElserAdaptiveAllocations {
+  enabled?: boolean
+  max_number_of_allocations?: integer
+  min_number_of_allocations?: integer
+}
+
+export interface InferencePutElserElserServiceSettings {
+  adaptive_allocations?: InferencePutElserAdaptiveAllocations
+  num_allocations: integer
+  num_threads: integer
+}
+
+export type InferencePutElserElserTaskType = 'sparse_embedding'
+
+export interface InferencePutElserRequest extends RequestBase {
+  task_type: InferencePutElserElserTaskType
+  elser_inference_id: Id
+  body?: {
+    chunking_settings?: InferenceInferenceChunkingSettings
+    service: InferencePutElserServiceType
+    service_settings: InferencePutElserElserServiceSettings
+  }
+}
+
+export type InferencePutElserResponse = InferenceInferenceEndpointInfo
+
+export type InferencePutElserServiceType = 'elser'
+
 export interface InferencePutGoogleaistudioGoogleAiStudioServiceSettings {
   api_key: string
   model_id: string
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index b11d2f41d2..986ea3ea98 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -323,6 +323,7 @@ inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/o
 inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion
 inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put
 inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html
+inference-api-put-elser,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html
 inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html
 inference-api-put-jinaai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-jinaai.html
 inference-api-put-googlevertexai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html
diff --git a/specification/_json_spec/inference.put_elser.json b/specification/_json_spec/inference.put_elser.json
index e601e6c8c3..b943b31a7d 100644
--- a/specification/_json_spec/inference.put_elser.json
+++ b/specification/_json_spec/inference.put_elser.json
@@ -1,8 +1,8 @@
 {
   "inference.put_elser": {
-    "deprecated" : {
-      "version" : "8.16.0",
-      "description" : "The elser service is deprecated. Use the Elasticsearch inference integration instead, with model_id included in the service_settings."
+    "deprecated": {
+      "version": "8.16.0",
+      "description": "The elser service is deprecated. Use the Elasticsearch inference integration instead, with model_id included in the service_settings."
     },
     "documentation": {
       "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html",
diff --git a/specification/inference/put_elser/PutElserRequest.ts b/specification/inference/put_elser/PutElserRequest.ts
new file mode 100644
index 0000000000..3a21e01df3
--- /dev/null
+++ b/specification/inference/put_elser/PutElserRequest.ts
@@ -0,0 +1,137 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceChunkingSettings } from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { integer } from '@_types/Numeric'
+
+/**
+ * Create an ELSER inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `elser` service.
+ * You can also deploy ELSER by using the Elasticsearch inference integration.
+ *
+ * > info
+ * > Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.
+ *
+ * The API request will automatically download and deploy the ELSER model if it isn't already downloaded.
+ *
+ * > info
+ * > You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.
+ *
+ * After creating the endpoint, wait for the model deployment to complete before using it.
+ * To verify the deployment status, use the get trained model statistics API.
+ * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
+ * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+ * @rest_spec_name inference.put_elser
+ * @availability stack since=8.11.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @deprecated 8.16.0 The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-elser
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{elser_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     */
+    task_type: ElserTaskType
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    elser_inference_id: Id
+  }
+  body: {
+    /**
+     * The chunking configuration object.
+     * @ext_doc_id inference-chunking
+     */
+    chunking_settings?: InferenceChunkingSettings
+    /**
+     * The type of service supported for the specified task type. In this case, `elser`.
+     */
+    service: ServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `elser` service.
+     */
+    service_settings: ElserServiceSettings
+  }
+}
+
+export enum ElserTaskType {
+  sparse_embedding
+}
+
+export enum ServiceType {
+  elser
+}
+
+export class AdaptiveAllocations {
+  /**
+   * Turn on `adaptive_allocations`.
+   * @server_default false
+   */
+  enabled?: boolean
+  /**
+   * The maximum number of allocations to scale to.
+   * If set, it must be greater than or equal to `min_number_of_allocations`.
+   */
+  max_number_of_allocations?: integer
+  /**
+   * The minimum number of allocations to scale to.
+   * If set, it must be greater than or equal to 0.
+   * If not defined, the deployment scales to 0.
+   */
+  min_number_of_allocations?: integer
+}
+
+export class ElserServiceSettings {
+  /**
+   * Adaptive allocations configuration details.
+   * If `enabled` is true, the number of allocations of the model is set based on the current load the process gets.
+   * When the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.
+   * When the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.
+   * If `enabled` is true, do not set the number of allocations manually.
+   */
+  adaptive_allocations?: AdaptiveAllocations
+  /**
+   * The total number of allocations this model is assigned across machine learning nodes.
+   * Increasing this value generally increases the throughput.
+   * If adaptive allocations is enabled, do not set this value because it's automatically set.
+   */
+  num_allocations: integer
+  /**
+   * The number of threads used by each model allocation during inference.
+   * Increasing this value generally increases the speed per inference request.
+   * The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
+   * The value must be a power of 2.
+   * The maximum value is 32.
+   *
+   * > info
+   * > If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.
+   */
+  num_threads: integer
+}
diff --git a/specification/inference/put_elser/PutElserResponse.ts b/specification/inference/put_elser/PutElserResponse.ts
new file mode 100644
index 0000000000..d40639b031
--- /dev/null
+++ b/specification/inference/put_elser/PutElserResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfo } from '@inference/_types/Services'
+
+export class Response {
+  body: InferenceEndpointInfo
+}
diff --git a/specification/inference/put_elser/examples/request/PutElserRequestExample1.yaml b/specification/inference/put_elser/examples/request/PutElserRequestExample1.yaml
new file mode 100644
index 0000000000..85fd58f986
--- /dev/null
+++ b/specification/inference/put_elser/examples/request/PutElserRequestExample1.yaml
@@ -0,0 +1,12 @@
+summary: A sparse embedding task
+description: Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.
+# method_request: "PUT _inference/sparse_embedding/my-elser-model"
+# type: "request"
+value: |-
+  {
+      "service": "elser",
+      "service_settings": {
+          "num_allocations": 1,
+          "num_threads": 1
+      }
+  }
diff --git a/specification/inference/put_elser/examples/request/PutElserRequestExample2.yaml b/specification/inference/put_elser/examples/request/PutElserRequestExample2.yaml
new file mode 100644
index 0000000000..831115834c
--- /dev/null
+++ b/specification/inference/put_elser/examples/request/PutElserRequestExample2.yaml
@@ -0,0 +1,16 @@
+summary: Adaptive allocations
+description: Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.
+# method_request: "PUT _inference/sparse_embedding/my-elser-model"
+# type: "request"
+value: |-
+  {
+      "service": "elser",
+      "service_settings": {
+          "adaptive_allocations": {
+              "enabled": true,
+              "min_number_of_allocations": 3,
+              "max_number_of_allocations": 10
+          },
+          "num_threads": 1
+      }
+  }
diff --git a/specification/inference/put_elser/examples/response/PutElserResponseExample1.yaml b/specification/inference/put_elser/examples/response/PutElserResponseExample1.yaml
new file mode 100644
index 0000000000..9d0746cbd1
--- /dev/null
+++ b/specification/inference/put_elser/examples/response/PutElserResponseExample1.yaml
@@ -0,0 +1,15 @@
+# summary:
+description: A successful response when creating an ELSER inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "my-elser-model",
+    "task_type": "sparse_embedding",
+    "service": "elser",
+    "service_settings": {
+      "num_allocations": 1,
+      "num_threads": 1
+    },
+    "task_settings": {}
+  }